{
  "components": [
    {
      "best_practices": [
        "Ensure the remarkable_device_token.txt file exists and contains valid credentials before running this test",
        "Create the test_uploads directory and place a valid PDF file named test_document.pdf in it",
        "This function is intended for testing purposes only and should not be used in production code",
        "The function prints status messages to stdout, making it suitable for manual testing but not for automated test suites without output capture",
        "Consider wrapping this in a proper unit test framework (pytest, unittest) for better integration with CI/CD pipelines",
        "The function uses relative paths based on __file__, so it must be run as a script or from the correct working directory",
        "Handle the boolean return value appropriately in calling code to determine test success or failure"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 01:00:04",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "upload_manager"
      ],
      "description": "A test function that performs a quick upload of a PDF document to a reMarkable tablet without performing a full synchronization.",
      "docstring": "Quick test without full sync",
      "id": 2136,
      "imports": [
        "import os",
        "import sys",
        "import json",
        "from pathlib import Path",
        "from upload_manager import RemarkableUploadManager"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from upload_manager import RemarkableUploadManager"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 49,
      "line_start": 9,
      "name": "test_quick_upload_v1",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a lightweight integration test for the reMarkable upload functionality. It authenticates with the reMarkable cloud service, locates a test PDF file, and uploads it to the root folder of the device. It's designed for rapid testing during development without the overhead of a complete sync operation.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the PDF upload was successful, False if authentication failed, the test PDF file was not found, or the upload operation failed. Note that the function may also return None implicitly if an exception occurs during upload (though the exception is caught and False is printed).",
      "settings_required": [
        "A file named 'remarkable_device_token.txt' must exist in the same directory as the test script, containing valid reMarkable device authentication token",
        "A test PDF file must exist at './test_uploads/test_document.pdf' relative to the script location",
        "The upload_manager module must be available and contain the RemarkableUploadManager class",
        "Valid reMarkable cloud service credentials configured in the device token file"
      ],
      "source_code": "def test_quick_upload():\n    \"\"\"Quick test without full sync\"\"\"\n    print(\"\ud83d\ude80 Quick PDF Upload Test\")\n    print(\"=\" * 50)\n    \n    # Initialize upload manager\n    device_token_path = Path(__file__).parent / \"remarkable_device_token.txt\"\n    upload_manager = RemarkableUploadManager(device_token_path)\n    \n    # Authenticate\n    print(\"\ud83d\udd11 Authenticating...\")\n    if not upload_manager.authenticate():\n        print(\"\u274c Authentication failed\")\n        return False\n    print(\"\u2705 Authentication successful\")\n    \n    # Create test document\n    test_pdf = Path(__file__).parent / \"test_uploads\" / \"test_document.pdf\"\n    if not test_pdf.exists():\n        print(f\"\u274c Test PDF not found: {test_pdf}\")\n        return False\n    \n    # Upload the PDF\n    print(f\"\ud83d\udcc4 Uploading: {test_pdf.name}\")\n    try:\n        success = upload_manager.upload_pdf_document(\n            pdf_file=test_pdf,\n            name=\"QuickUploadTest\",\n            parent_uuid=\"\"  # Root folder\n        )\n        \n        if success:\n            print(\"\u2705 Upload successful!\")\n            return True\n        else:\n            print(\"\u274c Upload failed\")\n            return False\n            \n    except Exception as e:\n        print(f\"\u274c Upload error: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/quick_upload_test.py",
      "tags": [
        "testing",
        "pdf-upload",
        "remarkable",
        "integration-test",
        "file-upload",
        "cloud-sync",
        "authentication",
        "quick-test"
      ],
      "updated_at": "2025-12-07T02:00:04.933045",
      "usage_example": "# Ensure prerequisites are met:\n# 1. Create remarkable_device_token.txt with your device token\n# 2. Create test_uploads/test_document.pdf\n\nfrom pathlib import Path\nfrom upload_manager import RemarkableUploadManager\n\n# Run the test\nresult = test_quick_upload()\n\nif result:\n    print(\"Test passed successfully\")\nelse:\n    print(\"Test failed\")"
    },
    {
      "best_practices": [
        "This function modifies sys.path at runtime which can affect module resolution - use with caution in production code",
        "The function has a lazy import of RemarkableAuth which may hide import errors until runtime",
        "Ensure the database path 'remarkable_replica_v2/replica_database.json' exists or handle creation appropriately",
        "The function returns True/False but doesn't handle the actual operations shown in the menu - it's only a demo scaffold",
        "Consider adding error handling for database path access and RemarkableUploadManager initialization",
        "The function assumes RemarkableUploadManager is defined elsewhere in the module - ensure it's available before calling main()",
        "For production use, consider making the database path configurable rather than hardcoded"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside the function at runtime, required for authentication",
          "import": "from auth import RemarkableAuth",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:59:48",
      "decorators": [],
      "dependencies": [
        "sys",
        "pathlib",
        "auth",
        "requests",
        "json",
        "hashlib",
        "uuid",
        "base64",
        "binascii",
        "zlib",
        "datetime",
        "time",
        "crc32c",
        "re",
        "local_replica_v2"
      ],
      "description": "Demo function that showcases the reMarkable upload functionality by authenticating a user session and initializing an upload manager with available operations menu.",
      "docstring": "Demo of upload functionality",
      "id": 2135,
      "imports": [
        "import os",
        "import json",
        "import hashlib",
        "import requests",
        "import uuid",
        "import base64",
        "import binascii",
        "import zlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Tuple",
        "from datetime import datetime",
        "import time",
        "import crc32c",
        "import sys",
        "from auth import RemarkableAuth",
        "import re",
        "from local_replica_v2 import RemarkableReplicaBuilder",
        "from local_replica_v2 import RemarkableReplicaBuilder"
      ],
      "imports_required": [
        "from pathlib import Path",
        "import sys"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1153,
      "line_start": 1128,
      "name": "main_v67",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a demonstration and entry point for the reMarkable Upload Manager. It handles authentication through RemarkableAuth, creates a session, initializes the RemarkableUploadManager with a database path, and displays a menu of available operations (edit metadata, upload PDF, create notebook). It's designed to be run as a standalone demo to test the upload functionality.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if authentication succeeds and the upload manager is initialized successfully, False if authentication fails. The function primarily serves as a demo and doesn't perform actual operations beyond setup and menu display.",
      "settings_required": [
        "RemarkableAuth configuration (likely requires reMarkable API credentials or authentication tokens)",
        "Database file at path 'remarkable_replica_v2/replica_database.json' must exist or be creatable",
        "RemarkableUploadManager class must be defined in the same module or imported",
        "Valid reMarkable account credentials for authentication"
      ],
      "source_code": "def main():\n    \"\"\"Demo of upload functionality\"\"\"\n    import sys\n    sys.path.insert(0, str(Path(__file__).parent))\n    \n    from auth import RemarkableAuth\n    \n    # Authenticate\n    auth = RemarkableAuth()\n    session = auth.get_authenticated_session()\n    \n    if not session:\n        print(\"\u274c Authentication failed\")\n        return False\n    \n    # Initialize upload manager\n    database_path = \"remarkable_replica_v2/replica_database.json\"\n    uploader = RemarkableUploadManager(session, database_path)\n    \n    print(\"\ud83d\ude80 reMarkable Upload Manager Demo\")\n    print(\"Available operations:\")\n    print(\"1. Edit document metadata\")\n    print(\"2. Upload PDF document\")\n    print(\"3. Create new notebook\")\n    \n    return True",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/upload_manager.py",
      "tags": [
        "demo",
        "authentication",
        "remarkable",
        "upload-manager",
        "initialization",
        "session-management",
        "entry-point",
        "cli-menu",
        "document-management"
      ],
      "updated_at": "2025-12-07T01:59:48.529715",
      "usage_example": "# Ensure RemarkableUploadManager is defined in the same module\n# Ensure auth.py module exists with RemarkableAuth class\n# Ensure database directory exists\nimport os\nos.makedirs('remarkable_replica_v2', exist_ok=True)\n\n# Run the demo\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('Demo initialized successfully')\n    else:\n        print('Demo failed to initialize')"
    },
    {
      "best_practices": [
        "Ensure the print_client function is properly defined and handles connection errors gracefully",
        "Validate server address and queue name before calling this function to avoid connection failures",
        "Consider wrapping this function call in try-except blocks to handle potential network or connection errors",
        "The function assumes print_client returns an object with a send_job() method - ensure this contract is maintained",
        "Consider adding timeout parameters in kwargs to prevent indefinite blocking on network operations",
        "Ensure obj_uid is unique to prevent job conflicts or overwrites in the queue system"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:16:12",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "socket"
      ],
      "description": "Initializes a TCP messenger client and sends a job to a server queue for processing graph-related tasks with specified label types and language settings.",
      "docstring": null,
      "id": 1906,
      "imports": [
        "import asyncio",
        "import socket"
      ],
      "imports_required": [
        "import asyncio",
        "import socket"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 144,
      "line_start": 142,
      "name": "main_v117",
      "parameters": [
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "graph"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "server"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "queue"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "labeltype"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "language"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "obj_uid"
        },
        {
          "annotation": null,
          "default": null,
          "kind": "var_keyword",
          "name": "**kwargs"
        }
      ],
      "parameters_explained": {
        "**kwargs": "Additional keyword arguments passed through to the print_client function. Allows for extensibility and additional configuration options without modifying the function signature.",
        "graph": "Graph data structure or identifier representing the data/document to be processed. Expected to be compatible with the print_client function's requirements.",
        "labeltype": "Type or format of labels to be used in the printing/processing operation. Specifies how labels should be rendered or formatted.",
        "language": "Language code or identifier for localization purposes. Determines the language used for labels, text rendering, or document generation.",
        "obj_uid": "Unique identifier for the object being processed. Used to track and identify the specific job or document in the system.",
        "queue": "Queue identifier or name where the job should be submitted. Used to route the job to the appropriate processing queue on the server.",
        "server": "Server address or hostname where the print client should connect. Typically a string containing IP address or domain name."
      },
      "parent_class": null,
      "purpose": "This function serves as an entry point for creating a print client connection to a server, configuring it with graph data, queue information, label types, and language settings, then triggering the job submission. It appears to be part of a distributed printing or document processing system where jobs are queued and processed remotely via TCP connections.",
      "return_annotation": null,
      "return_explained": "This function does not explicitly return a value (implicitly returns None). The function's purpose is to perform side effects by creating a TCP messenger and sending a job, rather than returning data.",
      "settings_required": [
        "The print_client function must be defined and accessible in the same module or imported",
        "Server must be reachable and accepting TCP connections",
        "Appropriate network permissions and firewall rules to connect to the specified server",
        "Server-side queue configuration matching the provided queue parameter"
      ],
      "source_code": "def main(graph, server, queue, labeltype, language, obj_uid, **kwargs):\n    tcp_messenger = print_client(graph, server, queue, labeltype, language, obj_uid, **kwargs)\n    tcp_messenger.send_job()",
      "source_file": "/tf/active/vicechatdev/resources/printclient.py",
      "tags": [
        "tcp-client",
        "messaging",
        "job-submission",
        "print-client",
        "queue",
        "networking",
        "distributed-system",
        "graph-processing",
        "label-printing",
        "localization"
      ],
      "updated_at": "2025-12-07T01:59:48.529067",
      "usage_example": "# Assuming print_client is defined elsewhere in the module\n# Example usage:\ngraph_data = {'nodes': [1, 2, 3], 'edges': [(1, 2), (2, 3)]}\nserver_address = '192.168.1.100'\nqueue_name = 'print_queue_1'\nlabel_type = 'barcode'\nlanguage_code = 'en-US'\nobject_uid = 'doc-12345'\n\n# Call the main function\nmain(\n    graph=graph_data,\n    server=server_address,\n    queue=queue_name,\n    labeltype=label_type,\n    language=language_code,\n    obj_uid=object_uid,\n    timeout=30,\n    retry_count=3\n)"
    },
    {
      "best_practices": [
        "This function should be called from an if __name__ == '__main__': block to prevent execution when imported as a module",
        "The function depends on send_test_email being defined elsewhere in the codebase - ensure this dependency is available",
        "Use sys.exit(main()) to properly propagate the exit code to the operating system",
        "The --to argument is required; all other arguments have sensible defaults for local testing",
        "Default SMTP port 2525 is commonly used for testing/development; production systems typically use port 25, 465, or 587",
        "Consider adding error handling for invalid email addresses or network connectivity issues in production use"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 17:37:41",
      "decorators": [],
      "dependencies": [
        "argparse",
        "smtplib",
        "sys",
        "email.mime.text",
        "email.mime.multipart",
        "email.mime.application"
      ],
      "description": "Command-line interface function that parses arguments and sends a test email through an SMTP forwarder service, displaying connection details and returning an exit code based on success.",
      "docstring": null,
      "id": 1480,
      "imports": [
        "import smtplib",
        "import sys",
        "import argparse",
        "from email.mime.text import MIMEText",
        "from email.mime.multipart import MIMEMultipart",
        "from email.mime.application import MIMEApplication"
      ],
      "imports_required": [
        "import argparse",
        "import smtplib",
        "import sys",
        "from email.mime.text import MIMEText",
        "from email.mime.multipart import MIMEMultipart",
        "from email.mime.application import MIMEApplication"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 68,
      "line_start": 40,
      "name": "main_v116",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for a test client application that validates email forwarding functionality. It configures an argument parser to accept SMTP connection parameters, sender/recipient information, and email content, then invokes the send_test_email function with these parameters. The function is designed for testing and debugging email forwarder services by providing a simple CLI tool to send test emails with customizable parameters.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if the email was sent successfully (when send_test_email returns True), or 1 if the email sending failed (when send_test_email returns False). This follows standard Unix convention for command-line tool exit codes.",
      "settings_required": [
        "Requires a send_test_email function to be defined in the same module or imported",
        "SMTP server must be accessible at the specified host and port (default: localhost:2525)",
        "Recipient email address must be provided via --to argument"
      ],
      "source_code": "def main():\n    parser = argparse.ArgumentParser(description='Send test email to forwarder service')\n    parser.add_argument('--host', default='localhost', help='SMTP host (default: localhost)')\n    parser.add_argument('--port', default=2525, type=int, help='SMTP port (default: 2525)')\n    parser.add_argument('--from', dest='sender', default='test@example.com', help='Sender email')\n    parser.add_argument('--to', dest='recipient', required=True, help='Recipient email')\n    parser.add_argument('--subject', default='Test Email from Forwarder', help='Email subject')\n    parser.add_argument('--message', default='This is a test email from the email forwarder service.', help='Email message')\n    \n    args = parser.parse_args()\n    \n    print(\"Email Forwarder Test Client\")\n    print(\"=\" * 30)\n    print(f\"SMTP Server: {args.host}:{args.port}\")\n    print(f\"From: {args.sender}\")\n    print(f\"To: {args.recipient}\")\n    print(f\"Subject: {args.subject}\")\n    print()\n    \n    success = send_test_email(\n        smtp_host=args.host,\n        smtp_port=args.port,\n        sender=args.sender,\n        recipient=args.recipient,\n        subject=args.subject,\n        message=args.message\n    )\n    \n    return 0 if success else 1",
      "source_file": "/tf/active/vicechatdev/email-forwarder/send_test_email.py",
      "tags": [
        "cli",
        "email",
        "smtp",
        "testing",
        "command-line",
        "argparse",
        "email-forwarder",
        "test-client",
        "entry-point",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:59:48.528429",
      "usage_example": "# Basic usage with required recipient\nif __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)\n\n# Command line examples:\n# python script.py --to recipient@example.com\n# python script.py --host smtp.example.com --port 587 --from sender@test.com --to recipient@example.com --subject \"Custom Subject\" --message \"Custom message body\""
    },
    {
      "best_practices": [
        "This function is hardcoded to debug a specific session ID ('3fea9b6e-92ea-462a-ba67-996f251e39db'). Modify the session_id variable to debug different sessions.",
        "Ensure the database connection is properly configured before running this function to avoid connection errors.",
        "This function accesses a private method (_get_execution_tracking) which may change in future versions of the service.",
        "Use this function in development/debugging environments only, not in production code.",
        "The function assumes the session exists in the database; add error handling if using with dynamic session IDs.",
        "Consider parameterizing the session_id if this function will be reused for multiple sessions."
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 16:09:53",
      "decorators": [],
      "dependencies": [
        "services.StatisticalAnalysisService",
        "config.Config"
      ],
      "description": "A debugging utility function that analyzes and displays execution tracking information for a specific session in a statistical analysis service.",
      "docstring": null,
      "id": 1224,
      "imports": [
        "import sys",
        "from services import StatisticalAnalysisService",
        "from config import Config"
      ],
      "imports_required": [
        "from services import StatisticalAnalysisService",
        "from config import Config"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 35,
      "line_start": 9,
      "name": "main_v115",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a diagnostic tool to inspect the execution steps and metadata of a statistical analysis session. It retrieves session steps from the database, displays detailed information about each step (including type, ID, success status, and metadata), and tests the internal execution tracking method. This is primarily used for debugging and verifying that session execution data is being properly stored and retrieved.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It produces console output displaying session step information and execution tracking results.",
      "settings_required": [
        "Config class must be properly configured with database connection settings",
        "Database must be accessible and contain the session with ID '3fea9b6e-92ea-462a-ba67-996f251e39db'",
        "StatisticalAnalysisService must have a database_manager attribute with get_session_steps method",
        "StatisticalAnalysisService must have a _get_execution_tracking method"
      ],
      "source_code": "def main():\n    config = Config()\n    service = StatisticalAnalysisService(config)\n    \n    session_id = '3fea9b6e-92ea-462a-ba67-996f251e39db'\n    print(f\"Debugging execution tracking for session: {session_id}\")\n    \n    # Get steps directly\n    steps = service.database_manager.get_session_steps(session_id)\n    print(f\"\\nFound {len(steps)} steps:\")\n    \n    for i, step in enumerate(steps):\n        print(f\"  Step {i+1}: {step.step_type}\")\n        print(f\"    ID: {step.step_id}\")\n        print(f\"    Success: {step.execution_success}\")\n        print(f\"    Has metadata: {bool(step.metadata)}\")\n        if step.metadata:\n            print(f\"    Metadata keys: {list(step.metadata.keys())}\")\n            if 'execution_log' in step.metadata:\n                print(f\"    Execution log entries: {len(step.metadata['execution_log'])}\")\n        print()\n    \n    # Test the execution tracking method\n    print(\"Testing _get_execution_tracking method:\")\n    result = service._get_execution_tracking(session_id)\n    print(f\"Result keys: {list(result.keys())}\")\n    print(f\"Result: {result}\")",
      "source_file": "/tf/active/vicechatdev/full_smartstat/debug_execution_tracking.py",
      "tags": [
        "debugging",
        "diagnostics",
        "session-tracking",
        "execution-analysis",
        "database-inspection",
        "logging",
        "metadata-inspection",
        "statistical-analysis",
        "development-tool"
      ],
      "updated_at": "2025-12-07T01:59:48.527730",
      "usage_example": "# Ensure config.py and services module are available\n# from config import Config\n# from services import StatisticalAnalysisService\n\ndef main():\n    config = Config()\n    service = StatisticalAnalysisService(config)\n    \n    session_id = '3fea9b6e-92ea-462a-ba67-996f251e39db'\n    print(f\"Debugging execution tracking for session: {session_id}\")\n    \n    steps = service.database_manager.get_session_steps(session_id)\n    print(f\"\\nFound {len(steps)} steps:\")\n    \n    for i, step in enumerate(steps):\n        print(f\"  Step {i+1}: {step.step_type}\")\n        print(f\"    ID: {step.step_id}\")\n        print(f\"    Success: {step.execution_success}\")\n        print(f\"    Has metadata: {bool(step.metadata)}\")\n        if step.metadata:\n            print(f\"    Metadata keys: {list(step.metadata.keys())}\")\n            if 'execution_log' in step.metadata:\n                print(f\"    Execution log entries: {len(step.metadata['execution_log'])}\")\n        print()\n    \n    print(\"Testing _get_execution_tracking method:\")\n    result = service._get_execution_tracking(session_id)\n    print(f\"Result keys: {list(result.keys())}\")\n    print(f\"Result: {result}\")\n\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the test script",
        "The return value should be used as the process exit code to indicate test success/failure to CI/CD systems",
        "All test functions called by main() must be defined before calling main()",
        "Test functions should raise AssertionError for test failures to be properly caught and reported",
        "The function provides clear visual feedback with separator lines and status symbols (\u2713 and \u2717)",
        "Error handling includes full traceback printing for debugging unexpected exceptions"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when an exception occurs during test execution",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 10:46:30",
      "decorators": [],
      "dependencies": [
        "pandas",
        "pathlib",
        "traceback"
      ],
      "description": "Test runner function that executes a suite of regional format handling tests for CSV parsing, including European and US number formats with various delimiters.",
      "docstring": null,
      "id": 477,
      "imports": [
        "import os",
        "import sys",
        "import pandas as pd",
        "from pathlib import Path",
        "from smartstat_service import smart_read_csv",
        "from smartstat_service import convert_european_decimals",
        "import traceback"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import pandas as pd",
        "from pathlib import Path",
        "from smartstat_service import smart_read_csv",
        "from smartstat_service import convert_european_decimals"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 210,
      "line_start": 186,
      "name": "main_v114",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a test suite that validates CSV parsing functionality across different regional formats. It sequentially runs tests for European CSV (comma as decimal separator), US CSV (period as decimal separator), formats with thousands separators, and tab-delimited files. The function provides formatted console output showing test progress and results, returning 0 for success or 1 for failure.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if all tests pass successfully, 1 if any test fails (either through AssertionError or any other Exception). This follows standard Unix convention for process exit codes.",
      "settings_required": [
        "The test functions (test_european_csv, test_us_csv, test_european_with_thousands, test_us_with_thousands, test_tab_delimited_european) must be defined in the same module or imported",
        "The smartstat_service module must be available with smart_read_csv and convert_european_decimals functions",
        "Test CSV files must be present in expected locations for the test functions to work"
      ],
      "source_code": "def main():\n    print(\"\\n\" + \"=\"*60)\n    print(\"Regional Format Handling Tests\")\n    print(\"=\"*60)\n    \n    try:\n        test_european_csv()\n        test_us_csv()\n        test_european_with_thousands()\n        test_us_with_thousands()\n        test_tab_delimited_european()\n        \n        print(\"\\n\" + \"=\"*60)\n        print(\"\u2713 ALL TESTS PASSED!\")\n        print(\"=\"*60 + \"\\n\")\n        return 0\n        \n    except AssertionError as e:\n        print(f\"\\n\u2717 TEST FAILED: {e}\\n\")\n        return 1\n    except Exception as e:\n        print(f\"\\n\u2717 ERROR: {e}\\n\")\n        import traceback\n        traceback.print_exc()\n        return 1",
      "source_file": "/tf/active/vicechatdev/vice_ai/test_regional_formats.py",
      "tags": [
        "testing",
        "csv-parsing",
        "regional-formats",
        "test-runner",
        "data-validation",
        "european-format",
        "us-format",
        "number-formatting",
        "integration-tests"
      ],
      "updated_at": "2025-12-07T01:59:48.527098",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function is hardcoded with specific document hashes and metadata - it's designed for a specific debugging scenario and should be adapted for general use",
        "The function makes multiple HTTP requests sequentially; consider adding rate limiting or error handling for production use",
        "Authentication credentials should be properly secured in the RemarkableAuth implementation",
        "The function prints directly to stdout; consider using logging module for better control in production environments",
        "Error handling uses broad exception catching which may hide specific issues; consider more granular exception handling for production code",
        "The document dictionary is hardcoded and should be externalized to a configuration file for reusability",
        "Consider adding retry logic for network requests to handle transient failures"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:36:37",
      "decorators": [],
      "dependencies": [
        "auth",
        "json"
      ],
      "description": "Analyzes and compares .content files for PDF documents stored in reMarkable cloud storage, identifying differences between working and non-working documents.",
      "docstring": null,
      "id": 2073,
      "imports": [
        "from auth import RemarkableAuth",
        "import json"
      ],
      "imports_required": [
        "from auth import RemarkableAuth",
        "import json"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 118,
      "line_start": 6,
      "name": "main_v113",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This diagnostic function authenticates with the reMarkable cloud service, downloads .content metadata files for four specific PDF documents (two working, two broken), parses their JSON structure, and performs a detailed comparison to identify what makes some documents visible/working while others are not. It's designed for debugging document upload/visibility issues in the reMarkable ecosystem.",
      "return_annotation": null,
      "return_explained": "This function returns None (implicitly). It performs side effects by printing diagnostic information to stdout, including authentication status, document metadata, JSON content structure, and comparative analysis between working and broken documents.",
      "settings_required": [
        "RemarkableAuth class must be available and properly configured with authentication credentials",
        "Network access to eu.tectonic.remarkable.com API endpoint",
        "Valid reMarkable cloud account credentials (handled by RemarkableAuth)",
        "The auth module must implement get_authenticated_session() method that returns a requests-compatible session object"
      ],
      "source_code": "def main():\n    auth = RemarkableAuth()\n    session = auth.get_authenticated_session()\n    \n    if not session:\n        print(\"\u274c Authentication failed\")\n        return\n    \n    print(\"\ud83d\udcc4 COMPARING .CONTENT FILES FOR ALL 4 PDF DOCUMENTS\")\n    print(\"=\" * 70)\n    \n    # Document info from the log file\n    documents = {\n        'invoice_poulpharm': {\n            'name': 'invoice poulpharm june 2025',\n            'content_hash': '4843f8d18f154198752eef85dbefb3c8d2d9984fe84e70d13857f5a7d61dcff3',\n            'working': True,\n            'size': 720\n        },\n        'pylontech': {\n            'name': 'Pylontech force H3 datasheet',\n            'content_hash': 'feb1654a645e7d42eea63bb8f87a1888026fd3ac197aa725fa3d77ae8b3e1e8c',\n            'working': True,\n            'size': 831\n        },\n        'upload_test_1': {\n            'name': 'UploadTest_1753969395',\n            'content_hash': '1ea64a7fb8fdd227cff533ea190a74d5111656f57699db714d33f69aba4404d5',\n            'working': False,\n            'size': 741\n        },\n        'upload_test_2': {\n            'name': 'UploadTest_1753968602',\n            'content_hash': 'ddc9459da5fc01058d854c85e3879b05c145e82189f4dd409bdc5d88014ad5e5',\n            'working': False,\n            'size': 741\n        }\n    }\n    \n    content_data = {}\n    \n    for doc_key, doc_info in documents.items():\n        print(f\"\\n\ud83d\udd0d {doc_info['name']} ({'\u2705 WORKING' if doc_info['working'] else '\u274c NOT VISIBLE'})\")\n        print(f\"   Content hash: {doc_info['content_hash']}\")\n        print(f\"   Expected size: {doc_info['size']} bytes\")\n        \n        try:\n            # Download the .content file\n            content_response = session.get(f\"https://eu.tectonic.remarkable.com/sync/v3/files/{doc_info['content_hash']}\")\n            content_response.raise_for_status()\n            content_text = content_response.text\n            \n            print(f\"   Actual size: {len(content_text)} bytes\")\n            print(f\"   Size match: {'\u2705' if len(content_text) == doc_info['size'] else '\u274c'}\")\n            \n            # Parse JSON\n            try:\n                content_json = json.loads(content_text)\n                content_data[doc_key] = content_json\n                \n                print(f\"   \ud83d\udcca JSON Content:\")\n                print(f\"      fileType: {content_json.get('fileType', 'MISSING')}\")\n                print(f\"      pageCount: {content_json.get('pageCount', 'MISSING')}\")\n                print(f\"      originalPageCount: {content_json.get('originalPageCount', 'MISSING')}\")\n                print(f\"      sizeInBytes: {content_json.get('sizeInBytes', 'MISSING')}\")\n                print(f\"      formatVersion: {content_json.get('formatVersion', 'MISSING')}\")\n                print(f\"      orientation: {content_json.get('orientation', 'MISSING')}\")\n                print(f\"      pages array: {len(content_json.get('pages', []))} items\")\n                if content_json.get('pages'):\n                    print(f\"         First page UUID: {content_json['pages'][0]}\")\n                print(f\"      redirectionPageMap: {content_json.get('redirectionPageMap', 'MISSING')}\")\n                \n            except json.JSONDecodeError as e:\n                print(f\"   \u274c Invalid JSON: {e}\")\n                print(f\"   Raw content: {repr(content_text[:200])}\")\n                \n        except Exception as e:\n            print(f\"   \u274c Failed to download: {e}\")\n        \n        print(\"-\" * 50)\n    \n    # Compare working vs non-working\n    print(\"\\n\ud83d\udd0d DETAILED COMPARISON: WORKING vs NON-WORKING\")\n    print(\"=\" * 70)\n    \n    working_docs = [k for k, v in documents.items() if v['working']]\n    broken_docs = [k for k, v in documents.items() if not v['working']]\n    \n    print(f\"Working documents: {[documents[k]['name'] for k in working_docs]}\")\n    print(f\"Broken documents: {[documents[k]['name'] for k in broken_docs]}\")\n    \n    if working_docs and broken_docs:\n        print(\"\\n\ud83d\udd0d Key Differences Analysis:\")\n        \n        # Compare first working vs first broken\n        working_content = content_data.get(working_docs[0], {})\n        broken_content = content_data.get(broken_docs[0], {})\n        \n        print(f\"\\nComparing {documents[working_docs[0]]['name']} (working) vs {documents[broken_docs[0]]['name']} (broken):\")\n        \n        all_keys = set(working_content.keys()) | set(broken_content.keys())\n        for key in sorted(all_keys):\n            working_val = working_content.get(key, \"MISSING\")\n            broken_val = broken_content.get(key, \"MISSING\")\n            \n            if working_val != broken_val:\n                print(f\"   \ud83d\udd25 DIFFERENCE - {key}:\")\n                print(f\"      Working: {working_val}\")\n                print(f\"      Broken:  {broken_val}\")\n            else:\n                print(f\"   \u2705 SAME - {key}: {working_val}\")\n    \n    print(f\"\\n\ud83d\udcbe Content files analysis complete!\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_content_files.py",
      "tags": [
        "remarkable",
        "cloud-storage",
        "pdf-analysis",
        "debugging",
        "api-client",
        "document-metadata",
        "json-parsing",
        "comparison",
        "diagnostic",
        "file-download"
      ],
      "updated_at": "2025-12-07T01:59:48.526407",
      "usage_example": "# Ensure auth.py is available with RemarkableAuth class\n# from auth import RemarkableAuth\n# import json\n\nif __name__ == '__main__':\n    main()\n\n# Output will be printed to console showing:\n# - Authentication status\n# - Document metadata for 4 PDFs\n# - JSON content structure for each document\n# - Comparative analysis between working and broken documents"
    },
    {
      "best_practices": [
        "Always use --dry-run flag first to preview changes before modifying file timestamps",
        "Ensure the calling script defines fix_file_dates() and process_directory() functions before calling main()",
        "Use pattern matching (--pattern) to limit processing to specific file types",
        "The function expects to be called with no arguments as it uses argparse to parse sys.argv",
        "Error handling exits with code 1 for file not found errors",
        "Either --file or directory argument must be provided, but not both",
        "Default behavior is recursive directory traversal unless --no-recursive is specified"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:11:53",
      "decorators": [],
      "dependencies": [
        "argparse",
        "os",
        "sys",
        "pathlib",
        "datetime",
        "subprocess"
      ],
      "description": "Entry point function that parses command-line arguments to fix file timestamps by setting them to the oldest date found, either for a single file or recursively through a directory.",
      "docstring": null,
      "id": 1830,
      "imports": [
        "import os",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "import subprocess",
        "import argparse"
      ],
      "imports_required": [
        "import argparse",
        "import os",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "import subprocess"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 238,
      "line_start": 165,
      "name": "main_v112",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This main function serves as the CLI interface for a file date fixing utility. It configures an argument parser to handle various options including directory/file processing, pattern matching, dry-run mode, and recursive/non-recursive directory traversal. The function validates inputs and delegates to either fix_file_dates() for single files or process_directory() for directory operations.",
      "return_annotation": null,
      "return_explained": "This function does not return a value (implicitly returns None). It either exits with sys.exit(1) on error or completes execution after calling the appropriate processing functions.",
      "settings_required": [
        "Requires fix_file_dates() function to be defined in the same module for processing single files",
        "Requires process_directory() function to be defined in the same module for processing directories",
        "File system read/write permissions for the target files and directories"
      ],
      "source_code": "def main():\n    parser = argparse.ArgumentParser(\n        description=\"Fix file dates by setting all timestamps to the oldest date\",\n        formatter_class=argparse.RawDescriptionHelpFormatter,\n        epilog=\"\"\"\nExamples:\n  # Dry run on all files in a directory (recursive by default)\n  python3 fix_file_dates.py /path/to/folder --dry-run\n\n  # Actually fix the dates for all files in directory\n  python3 fix_file_dates.py /path/to/folder\n\n  # Process with custom pattern\n  python3 fix_file_dates.py /path/to/folder --pattern \"*.pdf\"\n\n  # Process only specific pattern\n  python3 fix_file_dates.py /path/to/folder --pattern \"*_fully_signed.pdf\"\n\n  # Process single file\n  python3 fix_file_dates.py --file /path/to/file.pdf\n\n  # Process without recursing subdirectories\n  python3 fix_file_dates.py /path/to/folder --no-recursive\n        \"\"\"\n    )\n    \n    parser.add_argument(\n        'directory',\n        nargs='?',\n        help='Directory to process (required unless using --file)'\n    )\n    \n    parser.add_argument(\n        '--pattern',\n        default='*',\n        help='File pattern to match (default: * = all files)'\n    )\n    \n    parser.add_argument(\n        '--file',\n        help='Process a single file instead of a directory'\n    )\n    \n    parser.add_argument(\n        '--dry-run',\n        action='store_true',\n        help='Show what would be done without making changes'\n    )\n    \n    parser.add_argument(\n        '--no-recursive',\n        action='store_true',\n        help='Do not search subdirectories'\n    )\n    \n    args = parser.parse_args()\n    \n    if args.file:\n        # Process single file\n        if not os.path.exists(args.file):\n            print(f\"Error: File {args.file} does not exist\")\n            sys.exit(1)\n        fix_file_dates(args.file, args.dry_run)\n    else:\n        # Process directory\n        if not args.directory:\n            parser.error(\"directory is required unless using --file\")\n        \n        process_directory(\n            args.directory,\n            pattern=args.pattern,\n            dry_run=args.dry_run,\n            recursive=not args.no_recursive\n        )",
      "source_file": "/tf/active/vicechatdev/mailsearch/fix_file_dates.py",
      "tags": [
        "cli",
        "command-line",
        "argparse",
        "file-processing",
        "timestamp-management",
        "entry-point",
        "file-dates",
        "directory-processing",
        "dry-run",
        "pattern-matching"
      ],
      "updated_at": "2025-12-07T01:59:48.525714",
      "usage_example": "# This function is typically called as the script entry point:\n# if __name__ == '__main__':\n#     main()\n\n# Command-line usage examples:\n# python3 fix_file_dates.py /path/to/folder --dry-run\n# python3 fix_file_dates.py /path/to/folder --pattern '*.pdf'\n# python3 fix_file_dates.py --file /path/to/file.pdf\n# python3 fix_file_dates.py /path/to/folder --no-recursive"
    },
    {
      "best_practices": [
        "This function should be run with appropriate permissions to delete directories within the configured scripts folder",
        "The function is platform-dependent and requires Unix-like systems (Linux/macOS) due to the 'du' command usage",
        "Consider backing up important data before running this cleanup operation",
        "The function performs cleanup on ALL sessions without confirmation - ensure this is the intended behavior before execution",
        "Monitor the output carefully to verify that the correct number of venvs are being cleaned and projects are preserved",
        "The function uses subprocess to call system commands which may have security implications if paths are not properly validated",
        "Exit codes should be properly handled by the calling process to determine if cleanup was successful"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 12:42:22",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "subprocess",
        "smartstat_config",
        "agent_executor"
      ],
      "description": "A cleanup utility function that removes virtual environment directories from all SmartStat sessions while preserving project files, reporting disk space freed and cleanup statistics.",
      "docstring": null,
      "id": 772,
      "imports": [
        "import os",
        "import sys",
        "from pathlib import Path",
        "from smartstat_config import Config",
        "from agent_executor import AgentExecutor",
        "import subprocess"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from smartstat_config import Config",
        "from agent_executor import AgentExecutor",
        "import subprocess"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 55,
      "line_start": 12,
      "name": "main_v111",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a comprehensive cleanup operation across all SmartStat sessions. It initializes the necessary configuration and agent executor, measures disk usage before and after cleanup, executes the cleanup of virtual environment directories across all sessions, and provides detailed reporting of the cleanup results including space freed, number of venvs cleaned, and projects preserved. This is useful for maintenance tasks to reclaim disk space by removing temporary virtual environments while keeping important project files intact.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful completion, 1 if cleanup failed. This follows standard Unix convention for command-line tools where 0 indicates success and non-zero indicates failure.",
      "settings_required": [
        "Config class must be properly configured with GENERATED_SCRIPTS_FOLDER attribute pointing to the directory containing SmartStat session folders",
        "AgentExecutor class must be available and properly implement the cleanup_venv_directories() method",
        "The 'du' command must be available on the system (Unix/Linux/macOS) for disk usage calculation",
        "Appropriate file system permissions to read directory sizes and delete virtual environment directories"
      ],
      "source_code": "def main():\n    # Initialize config and agent executor\n    config = Config()\n    agent_executor = AgentExecutor(config)\n    \n    print(f\"\\n{'='*60}\")\n    print(f\"Running FULL cleanup on all SmartStat sessions\")\n    print(f\"{'='*60}\\n\")\n    \n    # Get total size before\n    import subprocess\n    scripts_dir = Path(config.GENERATED_SCRIPTS_FOLDER)\n    result = subprocess.run(['du', '-sh', str(scripts_dir)], capture_output=True, text=True)\n    size_before = result.stdout.split()[0]\n    print(f\"Total size BEFORE cleanup: {size_before}\")\n    \n    # Count sessions and projects\n    session_count = len([d for d in scripts_dir.iterdir() if d.is_dir()])\n    print(f\"Total sessions: {session_count}\\n\")\n    \n    # Run cleanup on all sessions\n    print(\"Running cleanup...\")\n    cleanup_result = agent_executor.cleanup_venv_directories()  # No session_id = clean all\n    \n    if cleanup_result['success']:\n        print(f\"\\n\u2713 Cleanup completed successfully!\")\n        print(f\"  - Total venvs cleaned: {cleanup_result['cleaned_count']}\")\n        print(f\"  - Total projects preserved: {cleanup_result['preserved_count']}\")\n        print(f\"  - Total space freed: {cleanup_result['space_freed_mb'] / 1024:.2f} GB\")\n    else:\n        print(f\"\\n\u2717 Cleanup failed: {cleanup_result.get('error', 'Unknown error')}\")\n        return 1\n    \n    # Get size after\n    result = subprocess.run(['du', '-sh', str(scripts_dir)], capture_output=True, text=True)\n    size_after = result.stdout.split()[0]\n    print(f\"\\nTotal size AFTER cleanup: {size_after}\")\n    \n    print(f\"\\n{'='*60}\")\n    print(f\"Full cleanup completed successfully!\")\n    print(f\"Before: {size_before} \u2192 After: {size_after}\")\n    print(f\"{'='*60}\\n\")\n    \n    return 0",
      "source_file": "/tf/active/vicechatdev/vice_ai/run_full_cleanup.py",
      "tags": [
        "cleanup",
        "maintenance",
        "disk-space",
        "virtual-environment",
        "venv",
        "session-management",
        "file-system",
        "utility",
        "smartstat",
        "batch-processing"
      ],
      "updated_at": "2025-12-07T01:59:48.525035",
      "usage_example": "if __name__ == '__main__':\n    import sys\n    from pathlib import Path\n    from smartstat_config import Config\n    from agent_executor import AgentExecutor\n    import subprocess\n    \n    # Run the cleanup\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function is hardcoded to test a specific session ID and should be modified or parameterized for production use",
        "Requires Unix-like system with 'du' command; will not work on Windows without modification",
        "Should be run with appropriate file system permissions to access and clean session directories",
        "The function performs destructive operations (deleting venv directories) and should only be run on test data",
        "Consider adding error handling for subprocess calls that might fail on different systems",
        "The test session ID should exist before running this function, or the function will return early with error code 1",
        "Output is verbose and designed for manual inspection; consider adding structured logging for automated testing"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 11:14:52",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "subprocess",
        "smartstat_config",
        "agent_executor"
      ],
      "description": "A test function that validates the cleanup functionality of virtual environments in project directories by testing on a specific session, measuring disk space before/after cleanup, and verifying that important files are preserved.",
      "docstring": null,
      "id": 563,
      "imports": [
        "import os",
        "import sys",
        "from pathlib import Path",
        "from smartstat_config import Config",
        "from agent_executor import AgentExecutor",
        "import subprocess"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from smartstat_config import Config",
        "from agent_executor import AgentExecutor",
        "import subprocess"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 86,
      "line_start": 12,
      "name": "main_v110",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a comprehensive integration test for the venv cleanup feature. It tests the AgentExecutor's cleanup_venv_directories method on a hardcoded test session ID, measuring disk space usage before and after cleanup, verifying that virtual environments are removed while preserving critical project files (analysis_results.json, analysis_script.py). The function provides detailed console output showing the cleanup process and results.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful cleanup test completion, 1 if the session directory is not found or if cleanup fails. This follows standard Unix exit code conventions where 0 indicates success.",
      "settings_required": [
        "Config object must be properly configured with GENERATED_SCRIPTS_FOLDER attribute pointing to the directory containing session folders",
        "Session directory with ID '558edb65-de39-403f-85c3-06ebfe8fa252' must exist in the GENERATED_SCRIPTS_FOLDER",
        "Unix-like system with 'du' command available for disk usage measurement",
        "Proper file system permissions to read session directories and execute cleanup operations"
      ],
      "source_code": "def main():\n    # Initialize config and agent executor\n    config = Config()\n    agent_executor = AgentExecutor(config)\n    \n    # Test cleanup on oldest session\n    test_session_id = \"558edb65-de39-403f-85c3-06ebfe8fa252\"\n    \n    print(f\"\\n{'='*60}\")\n    print(f\"Testing cleanup on session: {test_session_id}\")\n    print(f\"{'='*60}\\n\")\n    \n    # Check size before cleanup\n    session_dir = Path(config.GENERATED_SCRIPTS_FOLDER) / test_session_id  # Use GENERATED_SCRIPTS_FOLDER\n    if not session_dir.exists():\n        print(f\"ERROR: Session directory not found: {session_dir}\")\n        return 1\n    \n    # Get size before\n    import subprocess\n    result = subprocess.run(['du', '-sh', str(session_dir)], capture_output=True, text=True)\n    size_before = result.stdout.split()[0]\n    print(f\"Session size BEFORE cleanup: {size_before}\")\n    \n    # List projects\n    projects = [d for d in session_dir.iterdir() if d.is_dir() and d.name.startswith('project_')]\n    print(f\"Found {len(projects)} project(s) in session:\\n\")\n    \n    for proj in projects:\n        proj_result = subprocess.run(['du', '-sh', str(proj)], capture_output=True, text=True)\n        proj_size = proj_result.stdout.split()[0]\n        venv_path = proj / 'venv'\n        has_venv = '\u2713' if venv_path.exists() else '\u2717'\n        print(f\"  {proj.name}: {proj_size} [venv: {has_venv}]\")\n    \n    # Run cleanup\n    print(f\"\\nRunning cleanup...\")\n    cleanup_result = agent_executor.cleanup_venv_directories(test_session_id)\n    \n    if cleanup_result['success']:\n        print(f\"\\n\u2713 Cleanup completed successfully!\")\n        print(f\"  - Cleaned venvs: {cleanup_result['cleaned_count']}\")\n        print(f\"  - Space freed: {cleanup_result['space_freed_mb']:.2f} MB\")\n        print(f\"  - Preserved projects: {cleanup_result['preserved_count']}\")\n    else:\n        print(f\"\\n\u2717 Cleanup failed: {cleanup_result.get('error', 'Unknown error')}\")\n        return 1\n    \n    # Get size after\n    result = subprocess.run(['du', '-sh', str(session_dir)], capture_output=True, text=True)\n    size_after = result.stdout.split()[0]\n    print(f\"\\nSession size AFTER cleanup: {size_after}\")\n    \n    # Verify venvs are gone but projects remain\n    print(f\"\\nVerifying cleanup:\\n\")\n    for proj in projects:\n        proj_result = subprocess.run(['du', '-sh', str(proj)], capture_output=True, text=True)\n        proj_size = proj_result.stdout.split()[0]\n        venv_path = proj / 'venv'\n        has_venv = '\u2713' if venv_path.exists() else '\u2717'\n        print(f\"  {proj.name}: {proj_size} [venv: {has_venv}]\")\n        \n        # Check that important files are preserved\n        analysis_file = proj / 'analysis_results.json'\n        script_file = proj / 'analysis_script.py'\n        if analysis_file.exists():\n            print(f\"    \u2713 analysis_results.json preserved\")\n        if script_file.exists():\n            print(f\"    \u2713 analysis_script.py preserved\")\n    \n    print(f\"\\n{'='*60}\")\n    print(f\"Cleanup test completed successfully!\")\n    print(f\"{'='*60}\\n\")\n    \n    return 0",
      "source_file": "/tf/active/vicechatdev/vice_ai/test_cleanup.py",
      "tags": [
        "testing",
        "cleanup",
        "virtual-environment",
        "venv",
        "disk-space",
        "integration-test",
        "file-system",
        "session-management",
        "project-cleanup",
        "validation"
      ],
      "updated_at": "2025-12-07T01:59:48.524308",
      "usage_example": "if __name__ == '__main__':\n    import sys\n    from pathlib import Path\n    from smartstat_config import Config\n    from agent_executor import AgentExecutor\n    import subprocess\n    \n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "Ensure the 'test_files' list is populated with valid file paths before calling this function",
        "Implement the required helper functions (test_pptx_file, test_docx_file, test_libreoffice_conversion) before using this function",
        "Install LibreOffice on the system to enable the conversion fallback feature",
        "Consider adding error handling for the case where helper functions are not defined",
        "The function modifies no state and only produces console output, making it safe for repeated execution",
        "Use this function as part of a test suite or diagnostic tool rather than in production code",
        "Consider capturing the results dictionary for programmatic access instead of relying solely on console output"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:59:06",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "traceback",
        "python-pptx",
        "python-docx",
        "subprocess",
        "tempfile",
        "sys"
      ],
      "description": "A test harness function that validates the ability to open and process PowerPoint and Word document files, with fallback to LibreOffice conversion for problematic files.",
      "docstring": null,
      "id": 350,
      "imports": [
        "import sys",
        "from pathlib import Path",
        "import traceback",
        "import pptx",
        "from docx import Document as DocxDocument",
        "import subprocess",
        "import tempfile"
      ],
      "imports_required": [
        "import sys",
        "from pathlib import Path",
        "import traceback",
        "import pptx",
        "from docx import Document as DocxDocument",
        "import subprocess",
        "import tempfile"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 211,
      "line_start": 161,
      "name": "main_v109",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a comprehensive testing utility for document file processing. It iterates through a predefined list of test files (PPTX, PPT, DOCX, DOC, DOCM formats), attempts to open them using native Python libraries (python-pptx and python-docx), and falls back to LibreOffice conversion if direct opening fails. It provides detailed console output with status indicators and generates a summary report of all test results.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). Instead, it prints test results to the console and displays a summary table showing the status of each tested file with visual indicators (\u2713 for pass, \u274c for fail, \u26a0\ufe0f for pass with conversion, \u2753 for not found).",
      "settings_required": [
        "A global variable 'test_files' must be defined containing a list of file paths to test",
        "Functions 'test_pptx_file(file_path)' and 'test_docx_file(file_path)' must be defined in the same module",
        "Function 'test_libreoffice_conversion(file_path)' must be defined in the same module",
        "LibreOffice must be installed on the system for the conversion fallback to work",
        "Test files must exist at the paths specified in the 'test_files' list"
      ],
      "source_code": "def main():\n    print(\"=\"*80)\n    print(\"TESTING PROBLEMATIC FILES\")\n    print(\"=\"*80)\n    \n    results = {}\n    \n    for file_path in test_files:\n        file_path_obj = Path(file_path)\n        \n        if not file_path_obj.exists():\n            print(f\"\\n\u274c Skipping non-existent file: {file_path_obj.name}\")\n            results[file_path_obj.name] = \"NOT_FOUND\"\n            continue\n            \n        ext = file_path_obj.suffix.lower()\n        \n        if ext in ['.pptx', '.ppt']:\n            success = test_pptx_file(file_path)\n            results[file_path_obj.name] = \"PASS\" if success else \"FAIL\"\n            \n            # If direct opening failed, try LibreOffice conversion\n            if not success:\n                print(f\"\\nTrying LibreOffice conversion as fallback...\")\n                conv_success = test_libreoffice_conversion(file_path)\n                if conv_success:\n                    results[file_path_obj.name] = \"PASS_WITH_CONVERSION\"\n                    \n        elif ext in ['.docx', '.doc', '.docm']:\n            success = test_docx_file(file_path)\n            results[file_path_obj.name] = \"PASS\" if success else \"FAIL\"\n            \n            # If direct opening failed, try LibreOffice conversion\n            if not success:\n                print(f\"\\nTrying LibreOffice conversion as fallback...\")\n                conv_success = test_libreoffice_conversion(file_path)\n                if conv_success:\n                    results[file_path_obj.name] = \"PASS_WITH_CONVERSION\"\n    \n    # Print summary\n    print(\"\\n\" + \"=\"*80)\n    print(\"SUMMARY\")\n    print(\"=\"*80)\n    for filename, status in results.items():\n        status_icon = {\n            \"PASS\": \"\u2713\",\n            \"FAIL\": \"\u274c\",\n            \"PASS_WITH_CONVERSION\": \"\u26a0\ufe0f\",\n            \"NOT_FOUND\": \"\u2753\"\n        }.get(status, \"?\")\n        print(f\"{status_icon} {filename}: {status}\")",
      "source_file": "/tf/active/vicechatdev/docchat/test_problematic_files.py",
      "tags": [
        "testing",
        "document-processing",
        "file-validation",
        "powerpoint",
        "word",
        "pptx",
        "docx",
        "libreoffice",
        "conversion",
        "test-harness",
        "file-handling",
        "error-handling"
      ],
      "updated_at": "2025-12-07T01:59:48.523619",
      "usage_example": "# Define required dependencies first\ntest_files = [\n    '/path/to/presentation.pptx',\n    '/path/to/document.docx',\n    '/path/to/legacy.ppt'\n]\n\ndef test_pptx_file(file_path):\n    try:\n        prs = pptx.Presentation(file_path)\n        return True\n    except:\n        return False\n\ndef test_docx_file(file_path):\n    try:\n        doc = DocxDocument(file_path)\n        return True\n    except:\n        return False\n\ndef test_libreoffice_conversion(file_path):\n    try:\n        result = subprocess.run(['libreoffice', '--headless', '--convert-to', 'pdf', file_path], capture_output=True)\n        return result.returncode == 0\n    except:\n        return False\n\n# Run the test suite\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Ensure all required environment variables (especially OPENAI_API_KEY) are set before calling this function",
        "Verify that the ImprovedProjectVictoriaGenerator class is properly defined with a run_complete_pipeline method",
        "Check that all required data sources and PDF files are accessible before execution",
        "Handle the returned output_path appropriately, checking if the file was successfully created",
        "Consider wrapping the call in try-except blocks to handle potential errors from the pipeline execution",
        "Ensure sufficient disk space for ChromaDB storage and generated output files",
        "Monitor API usage and costs when using OpenAI services through this function"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:32:26",
      "decorators": [],
      "dependencies": [
        "os",
        "re",
        "json",
        "tiktoken",
        "typing",
        "datetime",
        "chromadb",
        "langchain_openai",
        "sentence_transformers",
        "fitz",
        "OneCo_hybrid_RAG"
      ],
      "description": "Entry point function that instantiates an ImprovedProjectVictoriaGenerator and executes its complete pipeline to generate disclosure documents.",
      "docstring": "Main function to run the improved disclosure generator.",
      "id": 99,
      "imports": [
        "import os",
        "import re",
        "import json",
        "import tiktoken",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Tuple",
        "from datetime import datetime",
        "import chromadb",
        "from langchain_openai import ChatOpenAI",
        "from sentence_transformers import CrossEncoder",
        "import fitz",
        "from OneCo_hybrid_RAG import MyEmbeddingFunction"
      ],
      "imports_required": [
        "import os",
        "import re",
        "import json",
        "import tiktoken",
        "from typing import List, Dict, Any, Tuple",
        "from datetime import datetime",
        "import chromadb",
        "from langchain_openai import ChatOpenAI",
        "from sentence_transformers import CrossEncoder",
        "import fitz",
        "from OneCo_hybrid_RAG import MyEmbeddingFunction"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 793,
      "line_start": 789,
      "name": "main_v108",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for running the improved disclosure generator system. It creates an instance of ImprovedProjectVictoriaGenerator, executes the full pipeline for generating disclosure documents (likely involving RAG-based document generation using embeddings and LLMs), and returns the path to the generated output file. This is typically used as the primary execution function when running the disclosure generation system.",
      "return_annotation": null,
      "return_explained": "Returns a string representing the file path (output_path) where the generated disclosure document has been saved. The exact format and location depend on the ImprovedProjectVictoriaGenerator's configuration and run_complete_pipeline method implementation.",
      "settings_required": [
        "OPENAI_API_KEY environment variable (required for langchain_openai.ChatOpenAI)",
        "ImprovedProjectVictoriaGenerator class must be defined and importable in the same module or imported",
        "OneCo_hybrid_RAG module must be available with MyEmbeddingFunction class",
        "ChromaDB database configuration and initialization",
        "PDF files or data sources required by the ImprovedProjectVictoriaGenerator pipeline",
        "Sentence transformer model files for CrossEncoder (downloaded automatically on first use)",
        "Tiktoken encoding files (downloaded automatically on first use)"
      ],
      "source_code": "def main():\n    \"\"\"Main function to run the improved disclosure generator.\"\"\"\n    generator = ImprovedProjectVictoriaGenerator()\n    output_path = generator.run_complete_pipeline()\n    return output_path",
      "source_file": "/tf/active/vicechatdev/improved_project_victoria_generator.py",
      "tags": [
        "entry-point",
        "main-function",
        "disclosure-generation",
        "RAG",
        "document-generation",
        "pipeline",
        "LLM",
        "embeddings",
        "chromadb",
        "langchain",
        "project-victoria"
      ],
      "updated_at": "2025-12-07T01:59:48.523001",
      "usage_example": "# Ensure all environment variables are set\nimport os\nos.environ['OPENAI_API_KEY'] = 'your-api-key-here'\n\n# Import the function (assuming it's in disclosure_generator.py)\nfrom disclosure_generator import main\n\n# Run the disclosure generator\noutput_file_path = main()\nprint(f'Disclosure document generated at: {output_file_path}')"
    },
    {
      "best_practices": [
        "Ensure the Poulpharm_labosoft.json schema file exists and is valid before running this function",
        "Set up required API keys (likely OpenAI) as environment variables before execution",
        "This function is intended for demonstration and testing purposes, not production use",
        "The function uses exception handling to gracefully handle schema loading errors and query generation failures",
        "The max_rows parameter is set to 100 to limit result sets during demonstration",
        "Review the generated SQL queries before executing them against a production database",
        "The function prints extensive output with emojis for readability - consider redirecting output if logging to files",
        "Each example query demonstrates different aspects of the database schema (requests, statistics, results, contacts, analysis groups)"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 18:05:58",
      "decorators": [],
      "dependencies": [
        "sql_query_generator"
      ],
      "description": "Demonstrates the SmartStat SQL Workflow by loading a database schema, initializing a SQL query generator, and generating SQL queries from natural language requests for various laboratory data analysis scenarios.",
      "docstring": null,
      "id": 1518,
      "imports": [
        "import sys",
        "import os",
        "from sql_query_generator import SQLQueryGenerator",
        "from sql_query_generator import DatabaseSchema",
        "from sql_query_generator import ConnectionConfig"
      ],
      "imports_required": [
        "from sql_query_generator import SQLQueryGenerator",
        "from sql_query_generator import DatabaseSchema",
        "from sql_query_generator import ConnectionConfig"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 74,
      "line_start": 13,
      "name": "main_v107",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a demonstration and testing tool for the SmartStat SQL workflow system. It showcases the complete pipeline from loading a database schema (Poulpharm_labosoft.json) to generating SQL queries from natural language requests. The function demonstrates five example use cases including laboratory requests, customer statistics, bacteriology results, veterinarian information, and analysis groups. It's designed to validate the integration between natural language processing and SQL query generation before deployment in the SmartStat Flask application.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by printing demonstration output to the console, including schema information, generated SQL queries, explanations, and metadata for each example query. The function may return early (None) if the schema fails to load.",
      "settings_required": [
        "Poulpharm_labosoft.json file must exist in the current working directory containing the database schema definition",
        "The SQLQueryGenerator class must be properly configured with any required API keys or credentials (likely OpenAI API key for natural language processing)",
        "Database connection configuration may be required depending on SQLQueryGenerator implementation"
      ],
      "source_code": "def main():\n    print(\"\ud83d\ude80 SmartStat SQL Workflow Demonstration\\n\")\n    \n    # Load the database schema\n    print(\"\ud83d\udccb Loading database schema...\")\n    try:\n        schema = DatabaseSchema.from_json(\"Poulpharm_labosoft.json\")\n        print(f\"\u2705 Loaded schema for: {schema.database_name}\")\n        print(f\"   Description: {schema.description}\")\n        print(f\"   Tables: {len(schema.complete_table_list)}\")\n        print()\n    except Exception as e:\n        print(f\"\u274c Error loading schema: {e}\")\n        return\n    \n    # Initialize the SQL query generator\n    print(\"\ud83d\udd27 Initializing SQL query generator...\")\n    query_generator = SQLQueryGenerator(schema)\n    print(\"\u2705 Query generator ready\\n\")\n    \n    # Example queries to demonstrate the workflow\n    example_queries = [\n        \"Show me recent laboratory requests with sample information from the last month\",\n        \"Get customer statistics including number of requests and most common tests\",\n        \"Find bacteriology results with antibiotic sensitivity data\",\n        \"List veterinarians and their associated practices with contact information\",\n        \"Show analysis groups and their associated individual analyses\"\n    ]\n    \n    print(\"\ud83e\uddea Generating SQL queries for example analysis requests:\\n\")\n    \n    for i, user_query in enumerate(example_queries, 1):\n        print(f\"\ud83d\udcdd Example {i}: {user_query}\")\n        print(\"-\" * 80)\n        \n        try:\n            # Generate SQL query\n            sql_query, metadata = query_generator.generate_sql_query(user_query, max_rows=100)\n            \n            print(f\"\ud83d\udca1 Explanation: {metadata['explanation']}\")\n            print(\"\\n\ud83d\udd0d Generated SQL Query:\")\n            print(\"```sql\")\n            print(sql_query)\n            print(\"```\")\n            \n            print(f\"\\n\ud83d\udcca Metadata:\")\n            print(f\"   Database: {metadata['database_name']}\")\n            print(f\"   Max rows: {metadata['max_rows']}\")\n            print(f\"   Generated at: {metadata['generated_at']}\")\n            \n        except Exception as e:\n            print(f\"\u274c Error generating query: {e}\")\n        \n        print(\"\\n\" + \"=\"*80 + \"\\n\")\n    \n    print(\"\ud83c\udfaf Workflow Summary:\")\n    print(\"1. User provides natural language analysis request\")\n    print(\"2. AI analyzes request against database schema\")\n    print(\"3. Appropriate SQL query is generated\")\n    print(\"4. Query is executed to retrieve relevant data\")\n    print(\"5. Data continues through normal analysis pipeline\")\n    print(\"\\n\u2728 Ready to integrate with SmartStat Flask application!\")",
      "source_file": "/tf/active/vicechatdev/smartstat/demo_sql_workflow.py",
      "tags": [
        "demonstration",
        "sql-generation",
        "natural-language-processing",
        "database-schema",
        "workflow",
        "laboratory-data",
        "query-generator",
        "testing",
        "example",
        "smartstat",
        "veterinary",
        "bacteriology"
      ],
      "updated_at": "2025-12-07T01:59:48.522274",
      "usage_example": "# Ensure Poulpharm_labosoft.json exists in current directory\n# Set any required environment variables (e.g., OPENAI_API_KEY)\n\nfrom sql_query_generator import SQLQueryGenerator, DatabaseSchema, ConnectionConfig\n\ndef main():\n    print(\"\ud83d\ude80 SmartStat SQL Workflow Demonstration\\n\")\n    \n    print(\"\ud83d\udccb Loading database schema...\")\n    try:\n        schema = DatabaseSchema.from_json(\"Poulpharm_labosoft.json\")\n        print(f\"\u2705 Loaded schema for: {schema.database_name}\")\n        print(f\"   Description: {schema.description}\")\n        print(f\"   Tables: {len(schema.complete_table_list)}\")\n        print()\n    except Exception as e:\n        print(f\"\u274c Error loading schema: {e}\")\n        return\n    \n    print(\"\ud83d\udd27 Initializing SQL query generator...\")\n    query_generator = SQLQueryGenerator(schema)\n    print(\"\u2705 Query generator ready\\n\")\n    \n    example_queries = [\n        \"Show me recent laboratory requests with sample information from the last month\",\n        \"Get customer statistics including number of requests and most common tests\"\n    ]\n    \n    print(\"\ud83e\uddea Generating SQL queries for example analysis requests:\\n\")\n    \n    for i, user_query in enumerate(example_queries, 1):\n        print(f\"\ud83d\udcdd Example {i}: {user_query}\")\n        print(\"-\" * 80)\n        \n        try:\n            sql_query, metadata = query_generator.generate_sql_query(user_query, max_rows=100)\n            \n            print(f\"\ud83d\udca1 Explanation: {metadata['explanation']}\")\n            print(\"\\n\ud83d\udd0d Generated SQL Query:\")\n            print(\"sql\")\n            print(sql_query)\n            print(\"\")\n            \n            print(f\"\\n\ud83d\udcca Metadata:\")\n            print(f\"   Database: {metadata['database_name']}\")\n            print(f\"   Max rows: {metadata['max_rows']}\")\n            print(f\"   Generated at: {metadata['generated_at']}\")\n            \n        except Exception as e:\n            print(f\"\u274c Error generating query: {e}\")\n        \n        print(\"\\n\" + \"=\"*80 + \"\\n\")\n\nif __name__ == \"__main__\":\n    main()"
    },
    {
      "best_practices": [
        "Ensure the database schema JSON file exists and is properly formatted before running this function",
        "The function expects a specific schema file name ('database_schema_20251003_120434.json') - modify this if using a different schema file",
        "This is a demonstration function intended for testing and documentation purposes, not for production use",
        "Error handling is implemented for schema loading and query generation, but errors are only printed to console",
        "The function uses a hardcoded list of example queries - customize these based on your specific use case",
        "The max_rows parameter is set to 100 for all queries - adjust this based on expected data volume",
        "Console output uses emoji characters for visual clarity - ensure your terminal supports UTF-8 encoding",
        "The function demonstrates the complete workflow but does not actually execute the generated SQL queries against a database"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 16:22:16",
      "decorators": [],
      "dependencies": [
        "sql_query_generator"
      ],
      "description": "Demonstrates a SmartStat SQL workflow by loading a database schema, initializing a SQL query generator, and generating SQL queries from natural language requests with detailed output and metadata.",
      "docstring": null,
      "id": 1248,
      "imports": [
        "import sys",
        "import os",
        "from sql_query_generator import SQLQueryGenerator",
        "from sql_query_generator import DatabaseSchema",
        "from sql_query_generator import ConnectionConfig"
      ],
      "imports_required": [
        "from sql_query_generator import SQLQueryGenerator",
        "from sql_query_generator import DatabaseSchema",
        "from sql_query_generator import ConnectionConfig"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 74,
      "line_start": 13,
      "name": "main_v106",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This is a demonstration/testing function that showcases the complete workflow of the SmartStat SQL query generation system. It loads a database schema from a JSON file, creates a SQLQueryGenerator instance, and processes multiple example natural language queries to demonstrate how user requests are converted into SQL queries. The function provides detailed console output with emojis for visual clarity, showing the schema loading process, query generation results, explanations, and metadata for each example query. It serves as both a testing tool and documentation of the intended workflow for integration with the SmartStat Flask application.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by printing demonstration output to the console. If the schema file cannot be loaded, the function returns early without processing queries.",
      "settings_required": [
        "A database schema JSON file named 'database_schema_20251003_120434.json' must exist in the current working directory",
        "The sql_query_generator module must be available in the Python path",
        "The schema JSON file must contain valid database schema information including database_name, description, and complete_table_list"
      ],
      "source_code": "def main():\n    print(\"\ud83d\ude80 SmartStat SQL Workflow Demonstration\\n\")\n    \n    # Load the database schema\n    print(\"\ud83d\udccb Loading database schema...\")\n    try:\n        schema = DatabaseSchema.from_json(\"database_schema_20251003_120434.json\")\n        print(f\"\u2705 Loaded schema for: {schema.database_name}\")\n        print(f\"   Description: {schema.description}\")\n        print(f\"   Tables: {len(schema.complete_table_list)}\")\n        print()\n    except Exception as e:\n        print(f\"\u274c Error loading schema: {e}\")\n        return\n    \n    # Initialize the SQL query generator\n    print(\"\ud83d\udd27 Initializing SQL query generator...\")\n    query_generator = SQLQueryGenerator(schema)\n    print(\"\u2705 Query generator ready\\n\")\n    \n    # Example queries to demonstrate the workflow\n    example_queries = [\n        \"Show me recent laboratory requests with sample information from the last month\",\n        \"Get customer statistics including number of requests and most common tests\",\n        \"Find bacteriology results with antibiotic sensitivity data\",\n        \"List veterinarians and their associated practices with contact information\",\n        \"Show analysis groups and their associated individual analyses\"\n    ]\n    \n    print(\"\ud83e\uddea Generating SQL queries for example analysis requests:\\n\")\n    \n    for i, user_query in enumerate(example_queries, 1):\n        print(f\"\ud83d\udcdd Example {i}: {user_query}\")\n        print(\"-\" * 80)\n        \n        try:\n            # Generate SQL query\n            sql_query, metadata = query_generator.generate_sql_query(user_query, max_rows=100)\n            \n            print(f\"\ud83d\udca1 Explanation: {metadata['explanation']}\")\n            print(\"\\n\ud83d\udd0d Generated SQL Query:\")\n            print(\"```sql\")\n            print(sql_query)\n            print(\"```\")\n            \n            print(f\"\\n\ud83d\udcca Metadata:\")\n            print(f\"   Database: {metadata['database_name']}\")\n            print(f\"   Max rows: {metadata['max_rows']}\")\n            print(f\"   Generated at: {metadata['generated_at']}\")\n            \n        except Exception as e:\n            print(f\"\u274c Error generating query: {e}\")\n        \n        print(\"\\n\" + \"=\"*80 + \"\\n\")\n    \n    print(\"\ud83c\udfaf Workflow Summary:\")\n    print(\"1. User provides natural language analysis request\")\n    print(\"2. AI analyzes request against database schema\")\n    print(\"3. Appropriate SQL query is generated\")\n    print(\"4. Query is executed to retrieve relevant data\")\n    print(\"5. Data continues through normal analysis pipeline\")\n    print(\"\\n\u2728 Ready to integrate with SmartStat Flask application!\")",
      "source_file": "/tf/active/vicechatdev/full_smartstat/demo_sql_workflow.py",
      "tags": [
        "demonstration",
        "workflow",
        "sql-generation",
        "natural-language-processing",
        "database-schema",
        "query-generator",
        "testing",
        "console-output",
        "smartstat",
        "example-queries",
        "metadata",
        "laboratory-data"
      ],
      "updated_at": "2025-12-07T01:59:48.521518",
      "usage_example": "# Ensure the schema file exists in the current directory\n# database_schema_20251003_120434.json\n\nfrom sql_query_generator import SQLQueryGenerator, DatabaseSchema, ConnectionConfig\n\ndef main():\n    print(\"\ud83d\ude80 SmartStat SQL Workflow Demonstration\\n\")\n    \n    print(\"\ud83d\udccb Loading database schema...\")\n    try:\n        schema = DatabaseSchema.from_json(\"database_schema_20251003_120434.json\")\n        print(f\"\u2705 Loaded schema for: {schema.database_name}\")\n        print(f\"   Description: {schema.description}\")\n        print(f\"   Tables: {len(schema.complete_table_list)}\")\n        print()\n    except Exception as e:\n        print(f\"\u274c Error loading schema: {e}\")\n        return\n    \n    print(\"\ud83d\udd27 Initializing SQL query generator...\")\n    query_generator = SQLQueryGenerator(schema)\n    print(\"\u2705 Query generator ready\\n\")\n    \n    example_queries = [\n        \"Show me recent laboratory requests with sample information from the last month\"\n    ]\n    \n    print(\"\ud83e\uddea Generating SQL queries for example analysis requests:\\n\")\n    \n    for i, user_query in enumerate(example_queries, 1):\n        print(f\"\ud83d\udcdd Example {i}: {user_query}\")\n        print(\"-\" * 80)\n        \n        try:\n            sql_query, metadata = query_generator.generate_sql_query(user_query, max_rows=100)\n            print(f\"\ud83d\udca1 Explanation: {metadata['explanation']}\")\n            print(\"\\n\ud83d\udd0d Generated SQL Query:\")\n            print(\"sql\")\n            print(sql_query)\n            print(\"\")\n        except Exception as e:\n            print(f\"\u274c Error generating query: {e}\")\n\nif __name__ == \"__main__\":\n    main()"
    },
    {
      "best_practices": [
        "Ensure ChromaDB server is running before executing this function",
        "Use --skip-collections to avoid reprocessing already cleaned collections",
        "Adjust --similarity-threshold based on your data characteristics (higher values are more strict)",
        "The function includes a 1-second sleep between collections to avoid overwhelming the server",
        "Errors in individual collections are caught and logged but don't stop the entire process",
        "Monitor disk space as cleaned collections are created as new collections rather than modifying existing ones",
        "Consider using --skip-summarization for faster processing if summarization is not needed",
        "The function expects a clean_collection function to be defined elsewhere in the module"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "Required by the clean_collection function that this main function calls",
          "import": "from src.cleaners.hash_cleaner import HashCleaner",
          "optional": false
        },
        {
          "condition": "Required by the clean_collection function that this main function calls",
          "import": "from src.cleaners.similarity_cleaner import SimilarityCleaner",
          "optional": false
        },
        {
          "condition": "Required by the clean_collection function that this main function calls",
          "import": "from src.cleaners.combined_cleaner import CombinedCleaner",
          "optional": false
        },
        {
          "condition": "Required by the cleaning utilities",
          "import": "from src.utils.hash_utils import hash_text",
          "optional": false
        },
        {
          "condition": "Required by the cleaning utilities",
          "import": "from src.utils.similarity_utils import calculate_similarity",
          "optional": false
        },
        {
          "condition": "Required by the cleaning utilities",
          "import": "from src.clustering.text_clusterer import TextClusterer",
          "optional": false
        },
        {
          "condition": "Required for configuration settings",
          "import": "from src.config import Config",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 10:32:50",
      "decorators": [],
      "dependencies": [
        "argparse",
        "chromadb",
        "time",
        "tqdm",
        "src.cleaners.hash_cleaner",
        "src.cleaners.similarity_cleaner",
        "src.cleaners.combined_cleaner",
        "src.utils.hash_utils",
        "src.utils.similarity_utils",
        "src.clustering.text_clusterer",
        "src.config"
      ],
      "description": "Command-line interface function that orchestrates the cleaning of ChromaDB collections by removing duplicates and similar documents, with options to skip collections and customize the cleaning process.",
      "docstring": null,
      "id": 434,
      "imports": [
        "import argparse",
        "import chromadb",
        "from chromadb.config import Settings",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "import os",
        "import time",
        "from tqdm import tqdm",
        "from src.cleaners.hash_cleaner import HashCleaner",
        "from src.cleaners.similarity_cleaner import SimilarityCleaner",
        "from src.cleaners.combined_cleaner import CombinedCleaner",
        "from src.utils.hash_utils import hash_text",
        "from src.utils.similarity_utils import calculate_similarity",
        "from src.clustering.text_clusterer import TextClusterer",
        "from src.config import Config"
      ],
      "imports_required": [
        "import argparse",
        "import chromadb",
        "from chromadb.config import Settings",
        "import time",
        "from tqdm import tqdm"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 68,
      "line_start": 20,
      "name": "main_v105",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no direct parameters. It uses argparse to parse command-line arguments including: --host (ChromaDB server host, default 'vice_chroma'), --port (ChromaDB server port, default 8000), --similarity-threshold (float threshold for detecting similar documents, default 0.95), --skip-collections (list of collection names to skip), --suffix (suffix for cleaned collection names, default '_clean'), and --skip-summarization (flag to skip summarization step)"
      },
      "parent_class": null,
      "purpose": "This is the main entry point for a ChromaDB collection cleaning utility. It connects to a ChromaDB instance, retrieves all collections, filters out collections to skip (including already cleaned ones), and processes each collection through a cleaning pipeline that removes duplicates and optionally summarizes similar documents. The cleaned data is stored in new collections with a configurable suffix.",
      "return_annotation": null,
      "return_explained": "Returns None. This function performs side effects by creating new cleaned collections in ChromaDB and printing progress information to stdout. Errors during collection cleaning are caught and printed but do not stop the overall process.",
      "settings_required": [
        "ChromaDB server must be running and accessible at the specified host and port",
        "The clean_collection function must be defined in the same module or imported",
        "Custom cleaner modules (HashCleaner, SimilarityCleaner, CombinedCleaner) must be available in src.cleaners",
        "Utility modules (hash_utils, similarity_utils) must be available in src.utils",
        "TextClusterer must be available in src.clustering",
        "Config module must be available in src.config",
        "Write permissions to create new collections in ChromaDB"
      ],
      "source_code": "def main():\n    # Parse command line arguments\n    parser = argparse.ArgumentParser(description='Clean up all ChromaDB collections')\n    parser.add_argument('--host', type=str, default='vice_chroma', help='ChromaDB host')\n    parser.add_argument('--port', type=int, default=8000, help='ChromaDB port')\n    parser.add_argument('--similarity-threshold', type=float, default=0.95, \n                        help='Similarity threshold for detecting similar documents')\n    parser.add_argument('--skip-collections', type=str, nargs='+', default=[], \n                        help='Collections to skip (e.g., already cleaned ones)')\n    parser.add_argument('--suffix', type=str, default='_clean', \n                        help='Suffix to add to cleaned collection names')\n    parser.add_argument('--skip-summarization', action='store_true', \n                        help='Skip the summarization step')\n    \n    args = parser.parse_args()\n    \n    # Connect to ChromaDB\n    client = chromadb.HttpClient(\n        host=args.host,\n        port=args.port,\n        settings=Settings(anonymized_telemetry=False)\n    )\n    \n    # Get all available collections\n    collection_names = client.list_collections()\n    \n    # Filter out collections to skip (e.g., already cleaned ones)\n    skip_suffix = args.suffix\n    to_process = [name for name in collection_names \n                 if not name.endswith(skip_suffix) and name not in args.skip_collections]\n    \n    print(f\"Found {len(collection_names)} total collections\")\n    print(f\"Will clean {len(to_process)} collections (skipping {len(collection_names) - len(to_process)})\")\n    \n    # Process each collection\n    for collection_name in tqdm(to_process, desc=\"Cleaning collections\"):\n        try:\n            clean_collection(\n                collection_name=collection_name,\n                output_collection=f\"{collection_name}{args.suffix}\",\n                host=args.host,\n                port=args.port,\n                similarity_threshold=args.similarity_threshold,\n                skip_summarization=args.skip_summarization\n            )\n            # Sleep briefly to avoid overwhelming the server\n            time.sleep(1)\n        except Exception as e:\n            print(f\"Error cleaning collection {collection_name}: {e}\")",
      "source_file": "/tf/active/vicechatdev/chromadb-cleanup/main.py",
      "tags": [
        "cli",
        "command-line",
        "chromadb",
        "database-cleaning",
        "deduplication",
        "similarity-detection",
        "batch-processing",
        "data-cleaning",
        "vector-database",
        "collection-management"
      ],
      "updated_at": "2025-12-07T01:59:48.520720",
      "usage_example": "# Run from command line:\n# python script.py --host localhost --port 8000 --similarity-threshold 0.95 --skip-collections collection1 collection2 --suffix _cleaned --skip-summarization\n\n# Or call directly in Python:\nif __name__ == '__main__':\n    main()\n\n# Example with custom arguments:\n# python cleanup_script.py --host vice_chroma --port 8000 --similarity-threshold 0.90 --skip-collections already_clean_collection --suffix _v2"
    },
    {
      "best_practices": [
        "This function is hardcoded with specific UUIDs and hashes - it should be modified or parameterized for production use",
        "The function uses a timestamp-based naming scheme to avoid document name collisions",
        "Error handling is present but minimal - production code should have more robust error handling",
        "The function performs synchronous operations and may block for extended periods during upload and sync",
        "The test specifically validates using hash values as parent identifiers, which may be an edge case or alternative API usage pattern",
        "Ensure the RemarkableTestUpload and RemarkableReplicaBuilder classes are properly initialized with necessary credentials before calling",
        "The function assumes the target folder already exists and will fail if it doesn't",
        "Network connectivity is required throughout execution for authentication, upload, and sync operations"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:37:10",
      "decorators": [],
      "dependencies": [
        "sys",
        "os",
        "time",
        "requests"
      ],
      "description": "A test function that uploads a PDF document to a reMarkable tablet folder using the folder's hash value as the parent identifier instead of its UUID, then verifies the upload through replica synchronization.",
      "docstring": null,
      "id": 2075,
      "imports": [
        "import sys",
        "import os",
        "from upload_manager import UploadManager",
        "import time",
        "import requests"
      ],
      "imports_required": [
        "import sys",
        "import os",
        "from upload_manager import UploadManager",
        "import time",
        "import requests"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 84,
      "line_start": 14,
      "name": "main_v104",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a test/demonstration script for the reMarkable tablet upload functionality. It specifically tests whether using a folder's hash value (instead of UUID) as the parent identifier works correctly when uploading documents. The function authenticates with the reMarkable cloud service, verifies a target folder exists, uploads a test PDF with a timestamped name, and then builds a local replica to verify the upload succeeded.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including printing status messages to console, uploading a document to reMarkable cloud, and building a local replica directory.",
      "settings_required": [
        "RemarkableTestUpload class must be defined and available in scope",
        "RemarkableReplicaBuilder class must be defined and available in scope",
        "Authentication credentials for reMarkable cloud service (likely stored in config or environment)",
        "Test PDF file must exist at the path specified by uploader.test_pdf_path",
        "Target folder with UUID '65aabcb0-94de-4e73-bb44-5f1e304c45a5' must exist in reMarkable account",
        "Network connectivity to reMarkable cloud services"
      ],
      "source_code": "def main():\n    print(\"\ud83d\udcc1 TESTING UPLOAD WITH HASH AS PARENT\")\n    print(\"=\" * 50)\n    \n    # Initialize uploader\n    uploader = RemarkableTestUpload()\n    \n    # Get folder info\n    myfolder_uuid = \"65aabcb0-94de-4e73-bb44-5f1e304c45a5\"\n    myfolder_hash = \"c9d2450e4584240a6a3e94237637861645a7ad9a3adc4a57a684f05399c75928\"\n    \n    print(f\"\ud83c\udfaf Target folder: Myfolder\")\n    print(f\"   UUID: {myfolder_uuid}\")\n    print(f\"   Hash: {myfolder_hash}\")\n    \n    # Authenticate\n    uploader.authenticate()\n    \n    # Verify folder exists\n    folder_info = uploader.get_node_info(myfolder_uuid)\n    if folder_info:\n        print(f\"\u2705 Found folder: {folder_info['metadata']['visibleName']}\")\n        print(f\"   Type: {folder_info['metadata']['type'].lower()}\")\n        print(f\"   Parent: {folder_info['metadata'].get('parent', 'root')}\")\n    else:\n        print(\"\u274c Folder not found!\")\n        return\n    \n    # Generate unique document name\n    timestamp = int(time.time())\n    doc_name = f\"HashParentTest_{timestamp}\"\n    \n    print(f\"\ud83d\udcc4 Uploading PDF: {doc_name}\")\n    print(f\"   Source file: {uploader.test_pdf_path}\")\n    print(f\"   Target folder: Myfolder (using HASH as parent)\")\n    \n    try:\n        # Upload with HASH as parent instead of UUID\n        result = uploader.upload_pdf_document(\n            pdf_path=uploader.test_pdf_path,\n            visible_name=doc_name,\n            parent_uuid=myfolder_hash  # Using HASH instead of UUID!\n        )\n        \n        if result:\n            print(f\"\u2705 Successfully uploaded PDF document: {doc_name}\")\n            print(f\"\ud83d\udd04 Document should appear in your device shortly after sync\")\n        else:\n            print(f\"\u274c Upload failed!\")\n            return\n            \n    except Exception as e:\n        print(f\"\u274c Upload error: {e}\")\n        return\n    \n    print(f\"\u2705 Upload to folder completed successfully!\")\n    print(f\"\ud83d\udd04 The document should now appear in Myfolder on your device\")\n    \n    # Run replica sync to verify\n    print(f\"\ud83d\udd04 Running replica sync to verify...\")\n    builder = RemarkableReplicaBuilder()\n    builder.build_replica()\n    \n    # Check if document appears in Myfolder\n    print(f\"\ud83d\udcc1 Myfolder contents after upload:\")\n    if os.path.exists(f\"{builder.replica_dir}/content/Myfolder\"):\n        for file in os.listdir(f\"{builder.replica_dir}/content/Myfolder\"):\n            if file.endswith('.pdf'):\n                print(f\"   \ud83d\udcc4 {file} (document)\")\n    else:\n        print(\"   \ud83d\udcc2 Myfolder directory not found\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_hash_parent_upload.py",
      "tags": [
        "remarkable",
        "tablet",
        "upload",
        "pdf",
        "cloud-sync",
        "testing",
        "file-management",
        "hash-identifier",
        "replica-sync",
        "document-upload"
      ],
      "updated_at": "2025-12-07T01:59:48.519931",
      "usage_example": "# Ensure required classes are imported/defined\n# from remarkable_upload import RemarkableTestUpload\n# from remarkable_replica import RemarkableReplicaBuilder\n\n# Simply call the function - it handles everything internally\nmain()\n\n# Expected output:\n# - Prints upload progress and status messages\n# - Uploads PDF to reMarkable folder 'Myfolder'\n# - Builds local replica to verify upload\n# - Lists contents of Myfolder after upload"
    },
    {
      "best_practices": [
        "This function should be called using asyncio.run(main()) or within an existing async context",
        "Ensure MIXED_AVAILABLE flag is properly set before calling this function",
        "All helper test functions (test_remarkable_auth, test_onedrive_auth, test_remarkable_discovery, test_mixed_mode_dry_run) must be defined and available",
        "Handle KeyboardInterrupt gracefully for user-initiated test cancellation",
        "The function uses sys.exit(1) which will terminate the entire program if MIXED_AVAILABLE is False",
        "Command-line arguments are mutually exclusive in practice (only one test mode should be specified at a time)",
        "Proper authentication credentials must be configured before running tests",
        "The function prints formatted output with emoji indicators for visual clarity in terminal"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "Required for mixed cloud processing functionality; availability checked via MIXED_AVAILABLE flag",
          "import": "from mixed_cloud_processor import MixedCloudProcessor",
          "optional": false
        },
        {
          "condition": "Required for reMarkable cloud watching functionality",
          "import": "from mixed_cloud_processor import RemarkableCloudWatcher",
          "optional": false
        },
        {
          "condition": "Required for creating mixed processor instances",
          "import": "from mixed_cloud_processor import create_mixed_processor",
          "optional": false
        },
        {
          "condition": "Required for creating reMarkable session objects",
          "import": "from mixed_cloud_processor import create_remarkable_session",
          "optional": false
        },
        {
          "condition": "Required for OneDrive authentication and operations",
          "import": "from onedrive_client import OneDriveClient",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 23:51:24",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "argparse",
        "json",
        "sys",
        "pathlib",
        "logging",
        "traceback"
      ],
      "description": "Asynchronous main entry point for a test suite that validates Mixed Cloud Processor functionality, including authentication, discovery, and dry-run operations for reMarkable and OneDrive integration.",
      "docstring": null,
      "id": 1960,
      "imports": [
        "import asyncio",
        "import argparse",
        "import json",
        "import sys",
        "from pathlib import Path",
        "from mixed_cloud_processor import MixedCloudProcessor",
        "from mixed_cloud_processor import RemarkableCloudWatcher",
        "from mixed_cloud_processor import create_mixed_processor",
        "from mixed_cloud_processor import create_remarkable_session",
        "from onedrive_client import OneDriveClient",
        "import logging",
        "import traceback"
      ],
      "imports_required": [
        "import asyncio",
        "import argparse",
        "import json",
        "import sys",
        "from pathlib import Path",
        "import logging",
        "import traceback"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 239,
      "line_start": 176,
      "name": "main_v103",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the command-line interface for testing a mixed cloud processing system that integrates reMarkable tablet cloud storage with OneDrive. It provides multiple test modes: authentication-only testing, reMarkable folder discovery testing, dry-run mode for mixed processing, and a comprehensive test suite that runs all tests sequentially. The function handles command-line arguments, orchestrates different test scenarios, and provides formatted console output with status indicators.",
      "return_annotation": null,
      "return_explained": "This function does not explicitly return a value (implicitly returns None). It performs side effects by printing test results to console and may exit the program with sys.exit(1) if MIXED_AVAILABLE is False.",
      "settings_required": [
        "MIXED_AVAILABLE global variable must be defined (boolean flag indicating if mixed cloud processor is available)",
        "test_remarkable_auth() async function must be defined in the same module",
        "test_onedrive_auth() async function must be defined in the same module",
        "test_remarkable_discovery(session) async function must be defined in the same module",
        "test_mixed_mode_dry_run() async function must be defined in the same module",
        "create_remarkable_session() function must be available from mixed_cloud_processor module",
        "reMarkable cloud credentials (likely environment variables or config file)",
        "OneDrive authentication credentials (likely environment variables or config file)"
      ],
      "source_code": "async def main():\n    parser = argparse.ArgumentParser(description=\"Test Mixed Cloud Processor\")\n    parser.add_argument('--test-auth', action='store_true', help='Test authentication only')\n    parser.add_argument('--test-discovery', action='store_true', help='Test reMarkable folder discovery')\n    parser.add_argument('--dry-run', action='store_true', help='Test mixed mode without processing')\n    \n    args = parser.parse_args()\n    \n    if not MIXED_AVAILABLE:\n        print(\"\u274c Mixed cloud processor not available\")\n        sys.exit(1)\n    \n    print(\"\ud83e\uddea Mixed Cloud Processor Test Suite\")\n    print(\"=\" * 50)\n    \n    try:\n        if args.test_auth:\n            # Test authentication only\n            auth_success = await test_remarkable_auth()\n            onedrive_success = await test_onedrive_auth()\n            \n            if auth_success and onedrive_success:\n                print(\"\\n\u2705 All authentication tests passed\")\n            else:\n                print(\"\\n\u26a0\ufe0f Some authentication tests failed\")\n        \n        elif args.test_discovery:\n            # Test discovery\n            session = create_remarkable_session()\n            await test_remarkable_discovery(session)\n        \n        elif args.dry_run:\n            # Full dry run test\n            success = await test_mixed_mode_dry_run()\n            \n            if success:\n                print(\"\\n\u2705 Mixed mode dry run successful\")\n            else:\n                print(\"\\n\u274c Mixed mode dry run failed\")\n        \n        else:\n            # Run all tests\n            print(\"\ud83d\udd10 Authentication Tests\")\n            print(\"-\" * 30)\n            auth_success = await test_remarkable_auth()\n            onedrive_success = await test_onedrive_auth()\n            \n            if auth_success:\n                print(\"\\n\ud83d\udd0d Discovery Tests\")\n                print(\"-\" * 30)\n                session = create_remarkable_session()\n                await test_remarkable_discovery(session)\n            \n            if auth_success and onedrive_success:\n                print(\"\\n\ud83d\udd04 Mixed Mode Tests\")\n                print(\"-\" * 30)\n                await test_mixed_mode_dry_run()\n    \n    except KeyboardInterrupt:\n        print(\"\\n\ud83d\udc4b Test interrupted\")\n    except Exception as e:\n        print(f\"\\n\u274c Unexpected error: {e}\")\n        import traceback\n        traceback.print_exc()",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/test_mixed_mode.py",
      "tags": [
        "async",
        "testing",
        "cloud-integration",
        "remarkable",
        "onedrive",
        "authentication",
        "cli",
        "test-suite",
        "command-line",
        "argparse",
        "discovery",
        "dry-run"
      ],
      "updated_at": "2025-12-07T01:59:48.519131",
      "usage_example": "# Run from command line:\n# Test all functionality:\npython script.py\n\n# Test authentication only:\npython script.py --test-auth\n\n# Test reMarkable discovery:\npython script.py --test-discovery\n\n# Run dry-run mode:\npython script.py --dry-run\n\n# Or call programmatically:\nimport asyncio\n\nasync def run_tests():\n    await main()\n\nif __name__ == '__main__':\n    asyncio.run(main())"
    },
    {
      "best_practices": [
        "Ensure all required helper functions (scan_output_folder, scan_wuxi2_folder, compare_documents, save_results, print_summary) are properly implemented before calling main()",
        "Verify that OUTPUT_FOLDER and WUXI2_FOLDER paths exist and are accessible before execution",
        "Ensure sufficient disk space is available for writing RESULTS_FILE and DETAILED_JSON outputs",
        "Consider wrapping the main() call in a try-except block to handle potential file I/O errors, permission issues, or missing dependencies",
        "The function assumes specific folder structures and naming conventions - ensure your directories match the expected format",
        "For large document sets, be aware that this function may take significant time to complete and consume considerable memory",
        "Consider adding logging instead of or in addition to print statements for production use",
        "This function has side effects (file I/O, console output) - it's not idempotent and should be used carefully in automated workflows"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:16:07",
      "decorators": [],
      "dependencies": [
        "PyPDF2"
      ],
      "description": "Main entry point function that orchestrates a document comparison workflow between two folders (mailsearch/output and wuxi2 repository), detecting signatures and generating comparison results.",
      "docstring": null,
      "id": 1843,
      "imports": [
        "import os",
        "import re",
        "import json",
        "import csv",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Tuple",
        "from typing import Optional",
        "from difflib import SequenceMatcher",
        "import PyPDF2",
        "from collections import defaultdict"
      ],
      "imports_required": [
        "import os",
        "import re",
        "import json",
        "import csv",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict, List, Tuple, Optional",
        "from difflib import SequenceMatcher",
        "import PyPDF2",
        "from collections import defaultdict"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 469,
      "line_start": 448,
      "name": "main_v102",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary orchestrator for an enhanced document comparison tool. It coordinates the entire workflow: scanning two document folders, comparing their contents (including signature detection), saving results to files, and displaying a summary. It's designed to identify similarities, differences, and signatures between documents in the OUTPUT_FOLDER and WUXI2_FOLDER directories.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including printing to console, writing results to files (RESULTS_FILE and DETAILED_JSON), and potentially creating/modifying files in the file system.",
      "settings_required": [
        "OUTPUT_FOLDER constant/variable must be defined in the module scope pointing to the mailsearch/output directory",
        "WUXI2_FOLDER constant/variable must be defined in the module scope pointing to the wuxi2 repository directory",
        "RESULTS_FILE constant/variable must be defined specifying the path for results output",
        "DETAILED_JSON constant/variable must be defined specifying the path for detailed JSON output",
        "scan_output_folder() function must be defined in the same module",
        "scan_wuxi2_folder() function must be defined in the same module",
        "compare_documents() function must be defined in the same module",
        "save_results() function must be defined in the same module",
        "print_summary() function must be defined in the same module",
        "Read permissions required for OUTPUT_FOLDER and WUXI2_FOLDER directories",
        "Write permissions required for the directory where RESULTS_FILE and DETAILED_JSON will be saved"
      ],
      "source_code": "def main():\n    print(\"=\"*80)\n    print(\"Enhanced Document Comparison Tool with Signature Detection\")\n    print(\"Comparing mailsearch/output with wuxi2 repository\")\n    print(\"=\"*80)\n    \n    # Scan folders\n    output_docs = scan_output_folder(OUTPUT_FOLDER)\n    wuxi2_docs = scan_wuxi2_folder(WUXI2_FOLDER)\n    \n    # Compare documents\n    results = compare_documents(output_docs, wuxi2_docs)\n    \n    # Save results\n    save_results(results, RESULTS_FILE, DETAILED_JSON)\n    \n    # Print summary\n    print_summary(results)\n    \n    print(\"\\n\" + \"=\"*80)\n    print(\"Enhanced comparison complete!\")\n    print(\"=\"*80)",
      "source_file": "/tf/active/vicechatdev/mailsearch/enhanced_document_comparison.py",
      "tags": [
        "document-comparison",
        "signature-detection",
        "file-scanning",
        "orchestration",
        "main-entry-point",
        "pdf-processing",
        "batch-processing",
        "reporting",
        "file-analysis"
      ],
      "updated_at": "2025-12-07T01:59:48.510428",
      "usage_example": "# Define required constants and helper functions first\nOUTPUT_FOLDER = './mailsearch/output'\nWUXI2_FOLDER = './wuxi2'\nRESULTS_FILE = './comparison_results.csv'\nDETAILED_JSON = './detailed_results.json'\n\n# Define helper functions (scan_output_folder, scan_wuxi2_folder, etc.)\n# ... (implementation of helper functions)\n\n# Run the main function\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Ensure all required environment variables (especially OPENAI_API_KEY) are set before calling this function",
        "Verify that the FixedProjectVictoriaGenerator class is properly defined and all its dependencies are satisfied",
        "Handle the returned output_path appropriately, checking if the file exists and is valid",
        "Consider wrapping this function call in try-except blocks to handle potential errors from the pipeline execution",
        "This function has no parameters, so all configuration must be done through the FixedProjectVictoriaGenerator class initialization or environment variables",
        "Ensure sufficient system resources (memory, disk space) are available as disclosure generation may be resource-intensive",
        "Check that ChromaDB is properly initialized and accessible before running",
        "Verify that all required PDF source documents are available if the pipeline processes PDFs"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:11:24",
      "decorators": [],
      "dependencies": [
        "os",
        "re",
        "json",
        "tiktoken",
        "typing",
        "datetime",
        "chromadb",
        "langchain_openai",
        "sentence_transformers",
        "fitz",
        "OneCo_hybrid_RAG"
      ],
      "description": "Entry point function that instantiates a FixedProjectVictoriaGenerator and executes its complete pipeline to generate fixed disclosure documents.",
      "docstring": "Main function to run the fixed disclosure generator.",
      "id": 42,
      "imports": [
        "import os",
        "import re",
        "import json",
        "import tiktoken",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Tuple",
        "from datetime import datetime",
        "import chromadb",
        "from langchain_openai import ChatOpenAI",
        "from sentence_transformers import CrossEncoder",
        "import fitz",
        "from OneCo_hybrid_RAG import MyEmbeddingFunction"
      ],
      "imports_required": [
        "import os",
        "import re",
        "import json",
        "import tiktoken",
        "from typing import List, Dict, Any, Tuple",
        "from datetime import datetime",
        "import chromadb",
        "from langchain_openai import ChatOpenAI",
        "from sentence_transformers import CrossEncoder",
        "import fitz",
        "from OneCo_hybrid_RAG import MyEmbeddingFunction"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1628,
      "line_start": 1624,
      "name": "main_v101",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for running the fixed disclosure generation pipeline. It creates an instance of FixedProjectVictoriaGenerator, executes the complete pipeline workflow, and returns the path to the generated output. This is typically used as the primary execution function for the disclosure generation system, orchestrating the entire process from initialization to completion.",
      "return_annotation": null,
      "return_explained": "Returns a string representing the file path to the generated output from the disclosure generator pipeline. The exact format and location of this path depends on the FixedProjectVictoriaGenerator's run_complete_pipeline() method implementation. This could be a local file path, a relative path, or an absolute path to the generated disclosure document.",
      "settings_required": [
        "FixedProjectVictoriaGenerator class must be defined and available in the same module or imported",
        "OPENAI_API_KEY environment variable (required by langchain_openai.ChatOpenAI)",
        "ChromaDB database configuration and initialization",
        "Access to PDF files if fitz (PyMuPDF) is used for document processing",
        "Sentence transformers model files for CrossEncoder",
        "Custom embedding function from OneCo_hybrid_RAG module must be accessible",
        "Tiktoken encoding files for token counting"
      ],
      "source_code": "def main():\n    \"\"\"Main function to run the fixed disclosure generator.\"\"\"\n    generator = FixedProjectVictoriaGenerator()\n    output_path = generator.run_complete_pipeline()\n    return output_path",
      "source_file": "/tf/active/vicechatdev/fixed_project_victoria_generator.py",
      "tags": [
        "entry-point",
        "pipeline",
        "disclosure-generation",
        "orchestration",
        "main-function",
        "document-generation",
        "RAG",
        "LLM",
        "chromadb",
        "openai"
      ],
      "updated_at": "2025-12-07T01:59:48.509790",
      "usage_example": "# Ensure all environment variables are set\nimport os\nos.environ['OPENAI_API_KEY'] = 'your-api-key-here'\n\n# Import and run the main function\nfrom your_module import main\n\n# Execute the disclosure generation pipeline\noutput_path = main()\nprint(f'Disclosure generated at: {output_path}')"
    },
    {
      "best_practices": [
        "This function is designed as a standalone test and should not be used in production code",
        "The hardcoded folder UUID ('65aabcb0-94de-4e73-bb44-5f1e304c45a5') is specific to a test environment and should be replaced for different use cases",
        "The function performs side effects including creating files, uploading to cloud services, and saving logs",
        "Ensure proper authentication is configured before running this function",
        "The function uses enable_raw_logging=True which may generate large log files",
        "Consider wrapping the function call in proper error handling when integrating into larger test suites",
        "The function relies on test_uploads.py infrastructure, so that module must be properly configured",
        "Network connectivity to reMarkable cloud services is required for successful execution",
        "The function performs a full replica sync which may take time depending on the device's content"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "always required - imported at function start",
          "import": "from test_uploads import RemarkableUploadTests",
          "optional": false
        },
        {
          "condition": "required for replica sync verification after upload",
          "import": "from local_replica_v2 import RemarkableReplicaBuilder",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:28:43",
      "decorators": [],
      "dependencies": [
        "auth",
        "upload_manager",
        "pathlib",
        "time",
        "test_uploads",
        "local_replica_v2"
      ],
      "description": "Tests uploading a PDF document to a specific folder ('Myfolder') on a reMarkable device and verifies the upload by syncing and checking folder contents.",
      "docstring": null,
      "id": 2049,
      "imports": [
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from pathlib import Path",
        "import time",
        "from test_uploads import RemarkableUploadTests",
        "from local_replica_v2 import RemarkableReplicaBuilder"
      ],
      "imports_required": [
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from pathlib import Path",
        "import time"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 87,
      "line_start": 8,
      "name": "main_v100",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a comprehensive integration test for the reMarkable upload functionality. It initializes the upload test suite, verifies a target folder exists in the database, creates a test PDF, uploads it to the specified folder, saves HTTP logs, performs a replica sync to verify the upload, and displays the folder contents. It's designed to validate the entire upload-to-folder workflow end-to-end.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the upload to the folder completed successfully and verification passed, False if the folder UUID was not found, upload failed, or an exception occurred during the process. The function may also return None implicitly if the folder verification fails early.",
      "settings_required": [
        "reMarkable device authentication credentials (handled by RemarkableAuth)",
        "Valid reMarkable API session (created by test_uploads.RemarkableUploadTests)",
        "Access to reMarkable cloud services",
        "Write permissions to save test PDFs and log files",
        "The target folder UUID ('65aabcb0-94de-4e73-bb44-5f1e304c45a5') must exist in the reMarkable device database"
      ],
      "source_code": "def main():\n    print(\"\ud83d\udcc1 TESTING UPLOAD TO MYFOLDER\")\n    print(\"=\" * 50)\n    \n    # Initialize the same way as test_uploads.py\n    from test_uploads import RemarkableUploadTests\n    test_suite = RemarkableUploadTests(enable_raw_logging=True)\n    \n    # Myfolder UUID from the logs\n    myfolder_uuid = \"65aabcb0-94de-4e73-bb44-5f1e304c45a5\"\n    \n    print(f\"\ud83c\udfaf Target folder: Myfolder (UUID: {myfolder_uuid})\")\n    \n    # Verify the folder exists in our database\n    if myfolder_uuid not in test_suite.uploader.database['nodes']:\n        print(f\"\u274c Myfolder UUID not found in database\")\n        return False\n    \n    folder_node = test_suite.uploader.database['nodes'][myfolder_uuid]\n    print(f\"\u2705 Found folder: {folder_node['name']}\")\n    print(f\"   Type: {folder_node['node_type']}\")\n    print(f\"   Parent: {folder_node.get('metadata', {}).get('parent', 'root')}\")\n    \n    # Create a test PDF using the same method as test_uploads.py\n    test_pdf_path = test_suite.test_create_test_pdf()\n    \n    # Generate unique name for this test\n    test_name = f\"FolderTest_{int(time.time())}\"\n    \n    print(f\"\ud83d\udcc4 Uploading PDF: {test_name}\")\n    print(f\"   Source file: {test_pdf_path}\")\n    print(f\"   Target folder: Myfolder\")\n    \n    # Upload to the folder using the same method as test_uploads.py\n    try:\n        success = test_suite.uploader.upload_pdf_document(\n            str(test_pdf_path), \n            test_name, \n            parent_uuid=myfolder_uuid\n        )\n        \n        if success:\n            print(f\"\u2705 Upload to folder completed successfully!\")\n            print(f\"\ud83d\udd04 The document should now appear in Myfolder on your device\")\n            \n            # Save raw logs like test_uploads.py does\n            log_file = test_suite.save_raw_logs()\n            if log_file:\n                print(f\"\ud83d\udcdd Raw HTTP logs saved to: {log_file}\")\n            \n            # Run a quick sync to verify\n            print(f\"\ud83d\udd04 Running replica sync to verify...\")\n            from local_replica_v2 import RemarkableReplicaBuilder\n            replica_builder = RemarkableReplicaBuilder(test_suite.session)\n            replica_builder.build_complete_replica()\n            \n            # Check if it's in the folder\n            test_suite.uploader._load_database()\n            \n            folder_contents = []\n            for uuid, node in test_suite.uploader.database['nodes'].items():\n                if node.get('metadata', {}).get('parent') == myfolder_uuid:\n                    folder_contents.append({\n                        'name': node['name'],\n                        'uuid': uuid,\n                        'type': node['node_type']\n                    })\n            \n            print(f\"\ud83d\udcc1 Myfolder contents after upload:\")\n            for item in folder_contents:\n                print(f\"   \ud83d\udcc4 {item['name']} ({item['type']}) - {item['uuid'][:8]}...\")\n            \n            return True\n        else:\n            print(f\"\u274c Upload failed\")\n            return False\n            \n    except Exception as e:\n        print(f\"\u274c Upload error: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_folder_upload.py",
      "tags": [
        "testing",
        "remarkable",
        "upload",
        "pdf",
        "folder",
        "integration-test",
        "device-sync",
        "file-management",
        "cloud-storage",
        "verification"
      ],
      "updated_at": "2025-12-07T01:59:48.508976",
      "usage_example": "if __name__ == '__main__':\n    # Run the folder upload test\n    result = main()\n    if result:\n        print('Test passed: Document successfully uploaded to folder')\n    else:\n        print('Test failed: Upload or verification unsuccessful')\n    \n    # The function handles all setup internally:\n    # - Initializes test suite with raw logging\n    # - Creates test PDF\n    # - Uploads to specific folder UUID\n    # - Verifies upload through sync\n    # - Displays folder contents"
    },
    {
      "best_practices": [
        "Ensure all required constants (CLIENT_ID, TENANT_ID, SENDER_EMAIL, KEYWORD, DOWNLOAD_DIR, GRAPH_SCOPE) are defined before calling this function",
        "All helper functions (ensure_download_dir, get_msal_app, get_access_token, search_messages, download_attachments_for_message) must be implemented and available in scope",
        "The Azure AD application must have appropriate permissions granted and admin consent provided for Mail.Read or Mail.ReadWrite",
        "Store sensitive credentials (CLIENT_ID, TENANT_ID, client secrets) in environment variables or secure configuration, not hardcoded",
        "Consider adding error handling around API calls and file operations for production use",
        "The function prints to console - consider using logging module for better control in production environments",
        "Ensure sufficient disk space is available in DOWNLOAD_DIR before running",
        "Be aware of API rate limits when processing large numbers of messages",
        "This function is designed to be called as a script entry point, typically wrapped in 'if __name__ == \"__main__\"' block"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:26:19",
      "decorators": [],
      "dependencies": [
        "os",
        "base64",
        "requests",
        "msal",
        "pathlib"
      ],
      "description": "Main entry point function that authenticates with Microsoft Graph API, searches for emails from a specific sender containing a keyword, and downloads all attachments from matching messages to a local directory.",
      "docstring": null,
      "id": 1874,
      "imports": [
        "import os",
        "import base64",
        "import requests",
        "import msal",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import os",
        "import base64",
        "import requests",
        "import msal",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 158,
      "line_start": 144,
      "name": "main_v99",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function orchestrates an automated email attachment download workflow. It authenticates using MSAL (Microsoft Authentication Library), queries Microsoft Graph API for emails matching specific criteria (sender and keyword), and downloads all attachments from the found messages. This is useful for automated document retrieval, backup systems, or processing incoming attachments from specific sources.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including: printing search results and progress to console, creating directories, and downloading files to the filesystem.",
      "settings_required": [
        "DOWNLOAD_DIR constant/variable - path where attachments will be saved",
        "CLIENT_ID constant/variable - Azure AD application client ID",
        "TENANT_ID constant/variable - Azure AD tenant ID",
        "GRAPH_SCOPE constant/variable - Microsoft Graph API scope (typically 'https://graph.microsoft.com/.default' or 'Mail.Read')",
        "SENDER_EMAIL constant/variable - email address to filter messages by sender",
        "KEYWORD constant/variable - search keyword to filter messages",
        "ensure_download_dir() function must be defined - creates download directory if it doesn't exist",
        "get_msal_app() function must be defined - initializes MSAL application",
        "get_access_token() function must be defined - obtains OAuth access token",
        "search_messages() function must be defined - searches for messages via Graph API",
        "download_attachments_for_message() function must be defined - downloads attachments for a specific message",
        "Azure AD app registration with appropriate Microsoft Graph API permissions (Mail.Read or Mail.ReadWrite)"
      ],
      "source_code": "def main():\n    ensure_download_dir(DOWNLOAD_DIR)\n\n    app = get_msal_app(CLIENT_ID, TENANT_ID)\n    token = get_access_token(app, GRAPH_SCOPE)\n\n    print(f\"Searching for messages from {SENDER_EMAIL} containing '{KEYWORD}'...\")\n    messages = search_messages(token, SENDER_EMAIL, KEYWORD)\n    print(f\"Found {len(messages)} messages.\")\n\n    for msg in messages:\n        subject = msg.get(\"subject\", \"(no subject)\")\n        msg_id = msg.get(\"id\")\n        print(f\"\\nMessage: {subject}\")\n        download_attachments_for_message(token, msg_id, DOWNLOAD_DIR)",
      "source_file": "/tf/active/vicechatdev/mailsearch/example_script.py",
      "tags": [
        "email-automation",
        "microsoft-graph",
        "attachment-download",
        "msal",
        "oauth",
        "azure-ad",
        "email-search",
        "file-download",
        "orchestration",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.508303",
      "usage_example": "# Define required constants and helper functions first\nDOWNLOAD_DIR = './downloads'\nCLIENT_ID = 'your-client-id-here'\nTENANT_ID = 'your-tenant-id-here'\nGRAPH_SCOPE = ['https://graph.microsoft.com/.default']\nSENDER_EMAIL = 'sender@example.com'\nKEYWORD = 'invoice'\n\n# Helper functions (simplified examples)\ndef ensure_download_dir(path):\n    Path(path).mkdir(parents=True, exist_ok=True)\n\ndef get_msal_app(client_id, tenant_id):\n    authority = f'https://login.microsoftonline.com/{tenant_id}'\n    return msal.ConfidentialClientApplication(client_id, authority=authority, client_credential='secret')\n\ndef get_access_token(app, scopes):\n    result = app.acquire_token_for_client(scopes=scopes)\n    return result['access_token']\n\ndef search_messages(token, sender, keyword):\n    # Implementation to search messages\n    return []\n\ndef download_attachments_for_message(token, msg_id, download_dir):\n    # Implementation to download attachments\n    pass\n\n# Run the main function\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "SECURITY WARNING: Credentials are hardcoded in the source code. Use environment variables or secure credential management instead.",
        "The function depends on external functions 'has_wuxi_coding()' and 'upload_file_to_filecloud()' which must be defined in the same module.",
        "Use --dry-run flag first to verify which files will be uploaded before performing actual uploads.",
        "Ensure the source directory exists and contains files matching the pattern before running.",
        "The function uses a persistent session object for FileCloud API calls to maintain authentication.",
        "Error handling is implemented per-file, so one failure won't stop the entire batch.",
        "The timezone is set to Europe/Brussels (CET) - adjust if needed for different regions.",
        "Consider implementing retry logic for network failures in production use.",
        "The function prints progress to stdout - redirect or capture if logging to file is needed."
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:17:53",
      "decorators": [],
      "dependencies": [
        "argparse",
        "pathlib",
        "requests",
        "xmltodict",
        "datetime",
        "zoneinfo",
        "os",
        "re"
      ],
      "description": "Command-line application that uploads PDF files without WUXI coding from a local directory to a FileCloud server, with support for dry-run mode and customizable file patterns.",
      "docstring": null,
      "id": 1849,
      "imports": [
        "import os",
        "import re",
        "import requests",
        "import xmltodict",
        "from datetime import datetime",
        "from zoneinfo import ZoneInfo",
        "import argparse",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import argparse",
        "from pathlib import Path",
        "import requests",
        "import xmltodict",
        "from datetime import datetime",
        "from zoneinfo import ZoneInfo",
        "import os",
        "import re"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 246,
      "line_start": 157,
      "name": "main_v98",
      "parameters": [],
      "parameters_explained": {
        "No direct parameters": "This function takes no parameters directly. Instead, it uses argparse to parse command-line arguments: --source (source directory path, default './output'), --target (FileCloud target folder path), --dry-run (boolean flag for simulation mode), and --pattern (file glob pattern, default '*.pdf')"
      },
      "parent_class": null,
      "purpose": "This is the main entry point for a file upload utility that filters PDF files based on WUXI coding patterns and uploads them to a specific FileCloud location. It's designed for document management workflows where files need to be categorized and uploaded to a shared cloud storage system. The function handles authentication, file filtering, batch uploading, and provides detailed progress reporting with success/error summaries.",
      "return_annotation": null,
      "return_explained": "Returns None implicitly. The function performs side effects (file uploads, console output) and exits normally. Early returns occur when no files are found or in dry-run mode.",
      "settings_required": [
        "FileCloud server URL: 'https://filecloud.vicebio.com/' (hardcoded)",
        "FileCloud credentials: userid='wim@vicebio.com', password='Studico01!' (hardcoded - security concern)",
        "Access to Europe/Brussels timezone data (system timezone database)",
        "Network access to FileCloud server",
        "Functions 'has_wuxi_coding()' and 'upload_file_to_filecloud()' must be defined in the same module",
        "Read permissions on source directory (default: ./output)",
        "Write permissions on FileCloud target directory"
      ],
      "source_code": "def main():\n    parser = argparse.ArgumentParser(\n        description=\"Upload non-WUXI coded files from output folder to FileCloud\"\n    )\n    \n    parser.add_argument(\n        '--source',\n        default='./output',\n        help='Source directory (default: ./output)'\n    )\n    \n    parser.add_argument(\n        '--target',\n        default='/SHARED/vicebio_shares/03_CMC/e-sign - document to approve/Extract docusign - not Wuxi coded',\n        help='Target folder in FileCloud'\n    )\n    \n    parser.add_argument(\n        '--dry-run',\n        action='store_true',\n        help='Show what would be uploaded without actually uploading'\n    )\n    \n    parser.add_argument(\n        '--pattern',\n        default='*.pdf',\n        help='File pattern to match (default: *.pdf)'\n    )\n    \n    args = parser.parse_args()\n    \n    # Setup timezone\n    cet_timezone = ZoneInfo(\"Europe/Brussels\")\n    \n    # Find all PDF files without WUXI coding\n    source_path = Path(args.source)\n    all_files = list(source_path.glob(args.pattern))\n    non_wuxi_files = [f for f in all_files if not has_wuxi_coding(f.name)]\n    \n    print(f\"Found {len(all_files)} total files\")\n    print(f\"Filtered to {len(non_wuxi_files)} files without WUXI coding\")\n    print(f\"Target folder: {args.target}\")\n    print(\"=\" * 80)\n    \n    if not non_wuxi_files:\n        print(\"No files to upload\")\n        return\n    \n    if args.dry_run:\n        print(\"\\nDRY RUN MODE - No files will be uploaded\")\n        print(\"=\" * 80)\n        for file_path in sorted(non_wuxi_files):\n            print(f\"\\n{file_path.name}\")\n            print(f\"  \u2192 Would upload to: {args.target}/{file_path.name}\")\n        return\n    \n    # Login to FileCloud\n    print(\"\\nLogging in to FileCloud...\")\n    Headers = {'Accept': 'application/json'}\n    Creds = {'userid': 'wim@vicebio.com', 'password': 'Studico01!'}\n    ServerURL = 'https://filecloud.vicebio.com/'\n    LoginEndPoint = 'core/loginguest'\n    \n    s = requests.session()\n    LoginCall = s.post(ServerURL + LoginEndPoint, data=Creds, headers=Headers).json()\n    print(\"\u2713 Logged in successfully\")\n    print(\"=\" * 80)\n    \n    # Upload files\n    success_count = 0\n    error_count = 0\n    \n    for file_path in sorted(non_wuxi_files):\n        try:\n            if upload_file_to_filecloud(str(file_path), args.target, s, cet_timezone, args.dry_run):\n                success_count += 1\n            else:\n                error_count += 1\n        except Exception as e:\n            print(f\"\\n{file_path.name}\")\n            print(f\"  \u2717 Error: {e}\")\n            error_count += 1\n    \n    # Summary\n    print(\"\\n\" + \"=\" * 80)\n    print(\"SUMMARY\")\n    print(\"=\" * 80)\n    print(f\"Total files: {len(non_wuxi_files)}\")\n    print(f\"Successful: {success_count}\")\n    print(f\"Errors: {error_count}\")",
      "source_file": "/tf/active/vicechatdev/mailsearch/upload_non_wuxi_coded.py",
      "tags": [
        "file-upload",
        "filecloud",
        "cli",
        "batch-processing",
        "pdf-management",
        "document-management",
        "file-filtering",
        "cloud-storage",
        "argparse",
        "dry-run",
        "authentication",
        "session-management"
      ],
      "updated_at": "2025-12-07T01:59:48.507523",
      "usage_example": "# Run with default settings\nif __name__ == '__main__':\n    main()\n\n# Command-line usage examples:\n# python script.py\n# python script.py --source ./my_pdfs --pattern '*.pdf'\n# python script.py --dry-run\n# python script.py --target '/SHARED/custom_folder' --source ./docs\n# python script.py --pattern '*.docx' --dry-run"
    },
    {
      "best_practices": [
        "This function must be run from the email-forwarder project root directory containing requirements.txt and src/",
        "Requires helper functions 'run_command' and 'check_file_exists' to be defined in the same module",
        "The function should be run twice: once to create the venv, then again after activation to install dependencies",
        "Always activate the virtual environment before running the second time to ensure dependencies are installed in the correct location",
        "Review and configure the .env file with actual MS365 credentials before starting the application",
        "The function uses interactive prompts (input()) so it's not suitable for automated/non-interactive environments",
        "Uses exec() to test imports which can be a security concern - ensure the test_script content is trusted",
        "The function assumes Unix-like commands (cp) which may not work on Windows without modification",
        "Check return value to determine if setup was successful before proceeding with application startup"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only during import testing phase (Step 6), requires virtual environment to be active and dependencies installed",
          "import": "from forwarder.smtp_server import SMTPServer",
          "optional": false
        },
        {
          "condition": "only during import testing phase (Step 6), requires virtual environment to be active and dependencies installed",
          "import": "from forwarder.o365_client import O365Client",
          "optional": false
        },
        {
          "condition": "only during import testing phase (Step 6), requires virtual environment to be active and dependencies installed",
          "import": "from forwarder.email_handler import EmailHandler",
          "optional": false
        },
        {
          "condition": "only during import testing phase (Step 6), requires virtual environment to be active and dependencies installed",
          "import": "from config.settings import Settings",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 17:35:43",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "os",
        "sys",
        "subprocess",
        "shutil"
      ],
      "description": "Interactive setup script that configures a Python virtual environment for an email forwarder application, installs dependencies, and verifies the installation.",
      "docstring": null,
      "id": 1473,
      "imports": [
        "import os",
        "import sys",
        "import subprocess",
        "import shutil",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import subprocess",
        "import shutil",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 173,
      "line_start": 36,
      "name": "main_v97",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function orchestrates the complete setup process for an email forwarder application. It performs directory validation, checks Python/pip installation, creates or recreates a virtual environment, provides activation instructions, installs dependencies from requirements.txt, verifies configuration files (.env), tests Python imports, and displays final usage instructions. It's designed to be run as a standalone setup script to prepare the development/production environment.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the setup process completes successfully (all checks pass and dependencies are installed), False if any critical step fails (missing Python, wrong directory, failed dependency installation, or import errors). The return value indicates whether the environment is ready for use.",
      "settings_required": [
        "Must be run from the email-forwarder project root directory",
        "Requires requirements.txt file in the current directory",
        "Requires src/ directory in the current directory",
        "Python 3.8 or higher must be installed and accessible as 'python3'",
        "pip3 must be installed and accessible",
        ".env.example file should exist for configuration template",
        ".env file with MS365 credentials: TENANT_ID, CLIENT_ID, CLIENT_SECRET, FROM_EMAIL",
        "Virtual environment should be activated for full setup (VIRTUAL_ENV environment variable)",
        "Depends on external function 'run_command' for executing shell commands",
        "Depends on external function 'check_file_exists' for file validation"
      ],
      "source_code": "def main():\n    print(\"=\" * 60)\n    print(\"\ud83d\udce7 EMAIL FORWARDER - VIRTUAL ENVIRONMENT SETUP\")\n    print(\"=\" * 60)\n    print()\n    \n    # Check current directory\n    current_dir = Path.cwd()\n    print(f\"\ud83d\udcc1 Current directory: {current_dir}\")\n    \n    # Verify we're in the right directory\n    if not (Path(\"requirements.txt\").exists() and Path(\"src\").exists()):\n        print(\"\u274c This script must be run from the email-forwarder directory\")\n        print(\"   Required files: requirements.txt, src/ directory\")\n        return False\n    \n    print(\"\u2705 Project directory structure verified\")\n    print()\n    \n    # Step 1: Check Python\n    print(\"\ud83d\udc0d STEP 1: Checking Python installation\")\n    if not run_command(\"python3 --version\", \"Checking Python 3\"):\n        print(\"\u274c Python 3 is required. Please install Python 3.8 or higher.\")\n        return False\n    \n    if not run_command(\"pip3 --version\", \"Checking pip3\"):\n        print(\"\u274c pip3 is required. Please install pip3.\")\n        return False\n    \n    print()\n    \n    # Step 2: Virtual Environment Setup\n    print(\"\ud83c\udf10 STEP 2: Setting up virtual environment\")\n    \n    venv_path = Path(\"venv\")\n    if venv_path.exists():\n        print(\"\u26a0\ufe0f  Virtual environment already exists\")\n        response = input(\"Do you want to recreate it? (y/N): \").strip().lower()\n        if response == 'y':\n            shutil.rmtree(venv_path)\n            print(\"\ud83d\uddd1\ufe0f  Removed existing virtual environment\")\n    \n    if not venv_path.exists():\n        if not run_command(\"python3 -m venv venv\", \"Creating virtual environment\"):\n            return False\n    \n    print()\n    \n    # Step 3: Activation Instructions\n    print(\"\ud83d\udd27 STEP 3: Virtual environment activation\")\n    print(\"   To activate the virtual environment, run:\")\n    print(\"   \ud83d\udccb Linux/Mac: source venv/bin/activate\")\n    print(\"   \ud83d\udccb Windows:   venv\\\\Scripts\\\\activate\")\n    print()\n    \n    # Step 4: Check if we can detect activation\n    virtual_env = os.environ.get('VIRTUAL_ENV')\n    if virtual_env:\n        print(f\"\u2705 Virtual environment is active: {virtual_env}\")\n        \n        # Install dependencies\n        print(\"\ud83d\udce6 STEP 4: Installing dependencies\")\n        if run_command(\"pip install --upgrade pip\", \"Upgrading pip\"):\n            if run_command(\"pip install -r requirements.txt\", \"Installing requirements\"):\n                print(\"\u2705 Dependencies installed successfully\")\n            else:\n                print(\"\u274c Failed to install dependencies\")\n                return False\n        else:\n            print(\"\u26a0\ufe0f  pip upgrade failed, but continuing...\")\n            if not run_command(\"pip install -r requirements.txt\", \"Installing requirements\"):\n                print(\"\u274c Failed to install dependencies\")\n                return False\n        \n        print()\n        \n        # Step 5: Configuration check\n        print(\"\u2699\ufe0f  STEP 5: Configuration verification\")\n        \n        if not check_file_exists(\".env\", \".env configuration file\"):\n            if check_file_exists(\".env.example\", \".env.example template\"):\n                print(\"\ud83d\udccb Copy .env.example to .env and configure with your MS365 credentials:\")\n                print(\"   - TENANT_ID=your_tenant_id\")\n                print(\"   - CLIENT_ID=your_client_id\") \n                print(\"   - CLIENT_SECRET=your_client_secret\")\n                print(\"   - FROM_EMAIL=your_sender@domain.com\")\n                run_command(\"cp .env.example .env\", \"Copying configuration template\")\n        \n        print()\n        \n        # Step 6: Import test\n        print(\"\ud83e\uddea STEP 6: Testing Python imports\")\n        test_script = \"\"\"\nimport sys\nsys.path.insert(0, 'src')\ntry:\n    from forwarder.smtp_server import SMTPServer\n    from forwarder.o365_client import O365Client\n    from forwarder.email_handler import EmailHandler\n    from config.settings import Settings\n    print(\"\u2705 All imports successful\")\nexcept ImportError as e:\n    print(f\"\u274c Import error: {e}\")\n    sys.exit(1)\n\"\"\"\n        \n        try:\n            exec(test_script)\n        except SystemExit:\n            print(\"\u274c Import test failed\")\n            return False\n        \n        print()\n        \n        # Step 7: Final instructions\n        print(\"\ud83d\ude80 SETUP COMPLETE!\")\n        print(\"=\" * 40)\n        print(\"To start the email forwarder:\")\n        print(\"1. Ensure virtual environment is active:\")\n        print(\"   source venv/bin/activate\")\n        print()\n        print(\"2. Configure .env file with your MS365 credentials\")\n        print()\n        print(\"3. Start the service:\")\n        print(\"   python src/main.py\")\n        print()\n        print(\"4. Test the service:\")\n        print(\"   python send_test_email.py\")\n        print()\n        print(\"5. Stop with Ctrl+C when done\")\n        print(\"=\" * 40)\n        \n    else:\n        print(\"\u26a0\ufe0f  Virtual environment is not currently active\")\n        print(\"   Please activate it first with: source venv/bin/activate\")\n        print(\"   Then run this script again to complete the setup\")\n    \n    return True",
      "source_file": "/tf/active/vicechatdev/email-forwarder/setup_venv.py",
      "tags": [
        "setup",
        "installation",
        "virtual-environment",
        "venv",
        "dependency-management",
        "configuration",
        "email-forwarder",
        "interactive-setup",
        "environment-setup",
        "python-setup",
        "pip",
        "validation",
        "initialization"
      ],
      "updated_at": "2025-12-07T01:59:48.506764",
      "usage_example": "# This function is typically called as the entry point of a setup script\n# Run from the email-forwarder project directory:\n\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('Setup completed successfully')\n        sys.exit(0)\n    else:\n        print('Setup failed')\n        sys.exit(1)\n\n# Expected workflow:\n# 1. Navigate to email-forwarder directory\n# 2. Run: python setup.py\n# 3. If venv not active, activate it: source venv/bin/activate\n# 4. Run setup.py again to complete dependency installation\n# 5. Configure .env file with MS365 credentials\n# 6. Start the application: python src/main.py"
    },
    {
      "best_practices": [
        "This function requires three helper functions to be defined in the same module: check_service_process(), check_port_listening(), and test_smtp_basic()",
        "The function performs checks sequentially and returns False immediately upon first failure, implementing fail-fast behavior",
        "Ensure the email forwarder service (src/main.py) is running before calling this function",
        "The function modifies sys.path to import configuration, which may affect module resolution in the calling context",
        "Use this function as part of deployment verification or continuous health monitoring",
        "The function provides user-friendly console output with visual indicators (\u2713/\u2717) for easy interpretation",
        "Exit codes should be used when calling from command line: exit(0) for success, exit(1) for failure",
        "Consider wrapping this in a try-except block if using in automated monitoring to handle unexpected exceptions",
        "The function assumes port 2525 is the SMTP listening port; modify if using a different port"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported dynamically during configuration check (Check 4), requires src directory in path",
          "import": "import config.settings as settings",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 17:33:22",
      "decorators": [],
      "dependencies": [
        "subprocess",
        "sys",
        "os",
        "socket",
        "time",
        "smtplib"
      ],
      "description": "Performs a comprehensive status check of an email forwarder service, verifying process status, port availability, SMTP communication, and configuration settings.",
      "docstring": null,
      "id": 1465,
      "imports": [
        "import subprocess",
        "import sys",
        "import os",
        "import socket",
        "import time",
        "import smtplib",
        "import config.settings as settings"
      ],
      "imports_required": [
        "import subprocess",
        "import sys",
        "import os",
        "import socket",
        "import time",
        "import smtplib"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 101,
      "line_start": 44,
      "name": "main_v96",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a diagnostic and verification tool for an email forwarder service that accepts SMTP connections and forwards emails via Microsoft 365 Graph API. It systematically checks four critical aspects: (1) whether the service process is running, (2) if port 2525 is listening for connections, (3) SMTP protocol communication functionality, and (4) configuration validity. The function provides detailed console output with visual indicators and usage instructions, making it suitable for deployment verification, troubleshooting, and health monitoring.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if all four checks (service process, port listening, SMTP communication, and configuration) pass successfully, indicating the service is fully operational; False if any check fails, indicating the service has issues that need attention.",
      "settings_required": [
        "config.settings module must exist in src/config/ directory with the following attributes: SMTP_LISTEN_HOST, SMTP_LISTEN_PORT, MS365_SENDER_EMAIL, VALIDATE_RECIPIENTS",
        "Helper functions must be defined: check_service_process(), check_port_listening(), test_smtp_basic()",
        "Email forwarder service should be running as a separate process (src/main.py)",
        "SMTP port 2525 must be configured and accessible on 127.0.0.1",
        "Microsoft 365 Graph API credentials must be configured in settings"
      ],
      "source_code": "def main():\n    print(\"=\" * 60)\n    print(\"EMAIL FORWARDER SERVICE - FINAL STATUS CHECK\")\n    print(\"=\" * 60)\n    \n    # Check 1: Process running\n    print(\"\\n1. Checking if service process is running...\")\n    is_running, process_info = check_service_process()\n    if is_running:\n        print(f\"   \u2713 Service is running: {process_info}\")\n    else:\n        print(\"   \u2717 Service process not found\")\n        return False\n    \n    # Check 2: Port listening\n    print(\"\\n2. Checking if SMTP port 2525 is listening...\")\n    if check_port_listening():\n        print(\"   \u2713 Port 2525 is accepting connections\")\n    else:\n        print(\"   \u2717 Port 2525 is not accessible\")\n        return False\n    \n    # Check 3: SMTP communication\n    print(\"\\n3. Testing SMTP protocol communication...\")\n    smtp_result = test_smtp_basic()\n    if smtp_result is True:\n        print(\"   \u2713 SMTP protocol communication successful\")\n    else:\n        print(f\"   \u2717 SMTP communication failed: {smtp_result}\")\n        return False\n    \n    # Check 4: Configuration\n    print(\"\\n4. Checking configuration...\")\n    try:\n        sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))\n        import config.settings as settings\n        print(f\"   \u2713 SMTP Listen: {settings.SMTP_LISTEN_HOST}:{settings.SMTP_LISTEN_PORT}\")\n        print(f\"   \u2713 MS365 Sender: {settings.MS365_SENDER_EMAIL}\")\n        print(f\"   \u2713 Validation: {settings.VALIDATE_RECIPIENTS}\")\n    except Exception as e:\n        print(f\"   \u2717 Configuration error: {e}\")\n        return False\n    \n    print(\"\\n\" + \"=\" * 60)\n    print(\"SERVICE STATUS: \u2713 FULLY OPERATIONAL\")\n    print(\"=\" * 60)\n    print(\"\\nThe email forwarder service is successfully running and ready to:\")\n    print(\"\u2022 Accept SMTP connections on 127.0.0.1:2525\")\n    print(\"\u2022 Parse incoming email messages\")\n    print(\"\u2022 Forward emails via Microsoft 365 Graph API\")\n    print(\"\u2022 Handle rate limiting and retries\")\n    print(\"\u2022 Log all activities\")\n    print(\"\\nTo send test emails, use:\")\n    print(\"  python send_test_email.py --to recipient@domain.com --from sender@domain.com\")\n    print(\"\\nTo stop the service:\")\n    print(\"  pkill -f 'python src/main.py'\")\n    \n    return True",
      "source_file": "/tf/active/vicechatdev/email-forwarder/service_status.py",
      "tags": [
        "service-monitoring",
        "health-check",
        "smtp",
        "email-forwarder",
        "diagnostic",
        "status-check",
        "verification",
        "microsoft-365",
        "port-check",
        "process-check",
        "configuration-validation"
      ],
      "updated_at": "2025-12-07T01:59:48.505999",
      "usage_example": "# Assuming this is in a file called check_status.py with required helper functions\n# and the email forwarder service is running\n\nif __name__ == '__main__':\n    # Run the comprehensive status check\n    success = main()\n    \n    if success:\n        print(\"\\nAll systems operational!\")\n        exit(0)\n    else:\n        print(\"\\nService has issues that need attention.\")\n        exit(1)\n\n# Expected output when successful:\n# ============================================================\n# EMAIL FORWARDER SERVICE - FINAL STATUS CHECK\n# ============================================================\n# \n# 1. Checking if service process is running...\n#    \u2713 Service is running: PID 12345\n# \n# 2. Checking if SMTP port 2525 is listening...\n#    \u2713 Port 2525 is accepting connections\n# \n# 3. Testing SMTP protocol communication...\n#    \u2713 SMTP protocol communication successful\n# \n# 4. Checking configuration...\n#    \u2713 SMTP Listen: 127.0.0.1:2525\n#    \u2713 MS365 Sender: sender@domain.com\n#    \u2713 Validation: True\n# \n# ============================================================\n# SERVICE STATUS: \u2713 FULLY OPERATIONAL\n# ============================================================"
    },
    {
      "best_practices": [
        "Ensure the CSV file path is updated from 'your_dataset.csv' to your actual dataset location before running",
        "The load_dataset() function must be defined or imported before calling main()",
        "Dataset must contain all required columns: 'weight_gain', 'feed_conversion_ratio', 'mortality_rate', 'eimeria_infection', 'treatment', 'challenge_regimen'",
        "The 'eimeria_infection' column should be binary (0 = No, 1 = Yes) for proper visualization",
        "Run in an environment that supports matplotlib plot display (Jupyter notebook, IDE with plot support, or with appropriate backend configured)",
        "Consider adding plt.close() calls after plt.show() to prevent memory issues with multiple plots",
        "For large datasets, consider adding data sampling or limiting the number of visualizations",
        "The function performs multiple statistical tests; consider adjusting for multiple comparisons if using results for publication",
        "Missing values are reported but not automatically handled; consider preprocessing data before analysis"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 13:11:36",
      "decorators": [],
      "dependencies": [
        "pandas",
        "numpy",
        "seaborn",
        "matplotlib",
        "scipy",
        "os"
      ],
      "description": "Performs comprehensive exploratory data analysis on a broiler chicken performance dataset, analyzing the correlation between Eimeria infection and performance measures (weight gain, feed conversion ratio, mortality rate) across different treatments and challenge regimens.",
      "docstring": null,
      "id": 809,
      "imports": [
        "import pandas as pd",
        "import numpy as np",
        "import seaborn as sns",
        "import matplotlib.pyplot as plt",
        "from scipy.stats import pearsonr",
        "import os"
      ],
      "imports_required": [
        "import pandas as pd",
        "import numpy as np",
        "import seaborn as sns",
        "import matplotlib.pyplot as plt",
        "from scipy.stats import pearsonr",
        "import os"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 103,
      "line_start": 22,
      "name": "main_v95",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a complete data analysis pipeline for veterinary/agricultural research data. It loads a CSV dataset, validates required columns, generates descriptive statistics, performs correlation analysis between Eimeria infection and performance metrics, and creates multiple visualizations (histograms, boxplots) to explore relationships between infection status, treatments, challenge regimens, and performance outcomes. The function is designed for exploratory data analysis in poultry health research.",
      "return_annotation": null,
      "return_explained": "Returns None. The function performs side effects including printing analysis results to console and displaying multiple matplotlib/seaborn visualizations. It may return early (None) if the dataset fails to load or if required columns are missing.",
      "settings_required": [
        "A CSV file named 'your_dataset.csv' (or custom path) containing columns: 'weight_gain', 'feed_conversion_ratio', 'mortality_rate', 'eimeria_infection', 'treatment', 'challenge_regimen'",
        "The load_dataset() function must be defined in the same module or imported",
        "Display environment capable of showing matplotlib plots (GUI backend or Jupyter notebook)"
      ],
      "source_code": "def main():\n    # Load the dataset\n    file_path = 'your_dataset.csv'  # Replace with the path to your dataset\n    data = load_dataset(file_path)\n    \n    if data is None:\n        return\n\n    # Display the first few rows of the dataset\n    print(\"Dataset Preview:\")\n    print(data.head())\n\n    # Check if required columns exist\n    required_columns = ['weight_gain', 'feed_conversion_ratio', 'mortality_rate', 'eimeria_infection', 'treatment', 'challenge_regimen']\n    missing_columns = [col for col in required_columns if col not in data.columns]\n    if missing_columns:\n        print(f\"Error: Missing columns in the dataset: {missing_columns}\")\n        return\n\n    # Descriptive statistics for performance measures\n    print(\"\\nDescriptive Statistics:\")\n    performance_measures = ['weight_gain', 'feed_conversion_ratio', 'mortality_rate']\n    print(data[performance_measures].describe())\n\n    # Check for missing values\n    print(\"\\nMissing Values:\")\n    print(data.isnull().sum())\n\n    # Visualize the distribution of performance measures\n    for measure in performance_measures:\n        plt.figure(figsize=(8, 4))\n        sns.histplot(data[measure].dropna(), kde=True)\n        plt.title(f'Distribution of {measure}')\n        plt.xlabel(measure)\n        plt.ylabel('Frequency')\n        plt.show()\n\n    # Correlation analysis between Eimeria infection and performance measures\n    correlations = {}\n    for measure in performance_measures:\n        if data['eimeria_infection'].isnull().any() or data[measure].isnull().any():\n            print(f\"Warning: Missing data for correlation analysis with {measure}.\")\n            continue\n        corr, p_value = pearsonr(data['eimeria_infection'], data[measure])\n        correlations[measure] = {'correlation': corr, 'p_value': p_value}\n\n    # Display correlation results\n    print(\"\\nCorrelation Analysis:\")\n    for measure, stats in correlations.items():\n        print(f\"{measure}: Correlation = {stats['correlation']:.2f}, p-value = {stats['p_value']:.4f}\")\n\n    # Visualize the relationship between Eimeria infection and performance measures\n    for measure in performance_measures:\n        plt.figure(figsize=(8, 4))\n        sns.boxplot(x='eimeria_infection', y=measure, data=data)\n        plt.title(f'{measure} by Eimeria Infection Status')\n        plt.xlabel('Eimeria Infection (0 = No, 1 = Yes)')\n        plt.ylabel(measure)\n        plt.show()\n\n    # Grouping by treatment and challenge regimen\n    grouped_data = data.groupby(['treatment', 'challenge_regimen'])\n\n    # Descriptive statistics by group\n    print(\"\\nDescriptive Statistics by Treatment and Challenge Regimen:\")\n    for name, group in grouped_data:\n        print(f\"\\nGroup: {name}\")\n        print(group[performance_measures].describe())\n\n    # Visualize performance measures by treatment and challenge regimen\n    for measure in performance_measures:\n        plt.figure(figsize=(12, 6))\n        sns.boxplot(x='treatment', y=measure, hue='challenge_regimen', data=data)\n        plt.title(f'{measure} by Treatment and Challenge Regimen')\n        plt.xlabel('Treatment')\n        plt.ylabel(measure)\n        plt.legend(title='Challenge Regimen')\n        plt.show()\n\n    # Conclusion\n    print(\"\\nConclusion:\")\n    print(\"The analysis provides descriptive statistics and visualizations to explore the correlation between Eimeria infection and performance measures in broilers. Further statistical tests may be required to draw definitive conclusions.\")",
      "source_file": "/tf/active/vicechatdev/vice_ai/smartstat_scripts/343f5578-64e0-4101-84bd-5824b3c15deb/project_1/analysis.py",
      "tags": [
        "data-analysis",
        "exploratory-data-analysis",
        "veterinary-research",
        "poultry-health",
        "correlation-analysis",
        "data-visualization",
        "statistical-analysis",
        "eimeria-infection",
        "broiler-performance",
        "pandas",
        "seaborn",
        "matplotlib",
        "descriptive-statistics"
      ],
      "updated_at": "2025-12-07T01:59:48.505269",
      "usage_example": "# Ensure load_dataset function is defined\ndef load_dataset(file_path):\n    try:\n        return pd.read_csv(file_path)\n    except Exception as e:\n        print(f'Error loading dataset: {e}')\n        return None\n\n# Prepare your dataset CSV with required columns:\n# weight_gain, feed_conversion_ratio, mortality_rate, eimeria_infection, treatment, challenge_regimen\n\n# Update the file_path variable in the function or modify the CSV filename\n# Then call the function\nmain()\n\n# The function will:\n# 1. Load 'your_dataset.csv'\n# 2. Display dataset preview and statistics\n# 3. Show distribution plots for performance measures\n# 4. Perform correlation analysis with Eimeria infection\n# 5. Generate boxplots comparing groups by treatment and challenge regimen"
    },
    {
      "best_practices": [
        "Ensure DocumentComparator class is properly defined and imported before calling this function",
        "This function is designed to be called as a script entry point, typically within an if __name__ == '__main__' block",
        "The function catches all exceptions broadly, so specific error details may be lost - consider logging for production use",
        "Return value can be used for exit code determination in CLI applications",
        "Console output uses emoji characters - ensure terminal supports UTF-8 encoding"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:54:40",
      "decorators": [],
      "dependencies": [
        "json",
        "auth"
      ],
      "description": "Entry point function that compares real versus uploaded documents using DocumentComparator and displays the comparison results with formatted output.",
      "docstring": "Compare real vs uploaded documents",
      "id": 2118,
      "imports": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 256,
      "line_start": 237,
      "name": "main_v94",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main execution entry point for a document comparison utility. It instantiates a DocumentComparator object, performs a comparison between real and uploaded documents, and provides user-friendly console output with emojis indicating success or failure. The function is designed to help understand differences in document visibility or move behavior between real and uploaded versions.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the comparison process completes successfully (regardless of whether differences are found), False if an exception occurs during the comparison process.",
      "settings_required": [
        "DocumentComparator class must be available in the same module or imported",
        "RemarkableAuth module must be accessible for authentication functionality",
        "Appropriate authentication credentials/configuration required by RemarkableAuth",
        "Network access may be required if DocumentComparator fetches remote documents"
      ],
      "source_code": "def main():\n    \"\"\"Compare real vs uploaded documents\"\"\"\n    try:\n        comparator = DocumentComparator()\n        \n        print(f\"\ud83e\uddea Document Structure Comparison\")\n        \n        invoice_data, upload_data = comparator.compare_documents()\n        \n        if invoice_data and upload_data:\n            print(f\"\\n\u2705 Comparison completed!\")\n            print(f\"\ud83d\udca1 Check the differences above to understand why visibility/move behavior differs\")\n        else:\n            print(f\"\\n\u274c Comparison failed\")\n        \n        return True\n        \n    except Exception as e:\n        print(f\"\u274c Comparison failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/compare_documents.py",
      "tags": [
        "document-comparison",
        "entry-point",
        "main-function",
        "remarkable",
        "document-analysis",
        "error-handling",
        "console-output"
      ],
      "updated_at": "2025-12-07T01:59:48.504603",
      "usage_example": "# Assuming DocumentComparator is defined in the same module\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('Document comparison completed successfully')\n    else:\n        print('Document comparison encountered errors')"
    },
    {
      "best_practices": [
        "Ensure 'input_data.csv' exists and contains the required columns before calling this function",
        "The function uses comprehensive error handling with try-except blocks for each major operation",
        "All print statements provide progress tracking and error diagnostics",
        "The function follows early return pattern on errors to prevent cascading failures",
        "Output files are automatically named with descriptive prefixes (plot_01, table_01)",
        "Statistical significance is evaluated at the 0.05 level by default",
        "The function closes matplotlib figures after saving to prevent memory leaks",
        "Consider wrapping this function call in a try-except block for production use",
        "Verify write permissions in the working directory before execution",
        "The correlation assumes linear relationship between variables; check data distribution first"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 18:07:50",
      "decorators": [],
      "dependencies": [
        "pandas",
        "numpy",
        "matplotlib",
        "seaborn",
        "scipy"
      ],
      "description": "Performs statistical analysis to determine the correlation between antibiotic use frequency and vaccination modes (in-ovo vs non-in-ovo), generating visualizations and saving results to files.",
      "docstring": null,
      "id": 1522,
      "imports": [
        "import pandas as pd",
        "import numpy as np",
        "import matplotlib.pyplot as plt",
        "import seaborn as sns",
        "from scipy.stats import pearsonr",
        "import warnings"
      ],
      "imports_required": [
        "import pandas as pd",
        "import numpy as np",
        "import matplotlib.pyplot as plt",
        "import seaborn as sns",
        "from scipy.stats import pearsonr"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 86,
      "line_start": 17,
      "name": "main_v93",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a complete data analysis pipeline that: (1) loads antibiotic treatment data from a CSV file, (2) validates required columns exist, (3) calculates Pearson correlation between two vaccination modes, (4) creates a scatter plot visualization, (5) saves correlation metrics to a CSV file, and (6) writes statistical conclusions to a text file. It's designed for analyzing the relationship between antibiotic treatment frequencies in different vaccination contexts.",
      "return_annotation": null,
      "return_explained": "This function returns None implicitly. It performs side effects by creating three output files: 'plot_01_correlation_antibiotic_vaccination.png' (scatter plot), 'table_01_correlation_results.csv' (correlation metrics), and 'conclusions.txt' (statistical interpretation). The function may return early (None) if errors occur during data loading, validation, or processing.",
      "settings_required": [
        "Input file 'input_data.csv' must exist in the current working directory",
        "The CSV file must contain columns 'DWTreatmentId_False' and 'DWTreatmentId_True'",
        "Write permissions in the current directory for creating output files",
        "Sufficient disk space for saving PNG plot and CSV/TXT output files"
      ],
      "source_code": "def main():\n    print(\"Starting statistical analysis...\")\n    print(f\"Query: Conclude on the correlation between antibiotic use frequency and vaccination modes (in-ovo true or false). Use a single plot to illustrate this correlation.\")\n    \n    # Load data\n    try:\n        df = pd.read_csv('input_data.csv')\n        print(f\"Data loaded successfully: {df.shape}\")\n    except Exception as e:\n        print(f\"Error loading data: {e}\")\n        return\n    \n    # Data validation\n    required_columns = ['DWTreatmentId_False', 'DWTreatmentId_True']\n    for col in required_columns:\n        if col not in df.columns:\n            print(f\"Error: Missing required column '{col}' in the dataset.\")\n            return\n    \n    # Calculate correlation\n    try:\n        correlation, p_value = pearsonr(df['DWTreatmentId_False'], df['DWTreatmentId_True'])\n        print(f\"Correlation calculated: {correlation}, p-value: {p_value}\")\n    except Exception as e:\n        print(f\"Error calculating correlation: {e}\")\n        return\n    \n    # Plotting\n    try:\n        plt.figure(figsize=(10, 6))\n        sns.scatterplot(x='DWTreatmentId_False', y='DWTreatmentId_True', data=df)\n        plt.title('Correlation between Antibiotic Use Frequency and Vaccination Modes')\n        plt.xlabel('Antibiotic Use Frequency (Not In-Ovo)')\n        plt.ylabel('Antibiotic Use Frequency (In-Ovo)')\n        plt.grid(True)\n        plt.savefig('plot_01_correlation_antibiotic_vaccination.png')\n        plt.close()\n        print(\"Plot saved as 'plot_01_correlation_antibiotic_vaccination.png'\")\n    except Exception as e:\n        print(f\"Error generating plot: {e}\")\n        return\n    \n    # Save correlation result to a CSV file\n    try:\n        correlation_data = pd.DataFrame({\n            'Metric': ['Correlation', 'P-Value'],\n            'Value': [correlation, p_value]\n        })\n        correlation_data.to_csv('table_01_correlation_results.csv', index=False)\n        print(\"Correlation results saved as 'table_01_correlation_results.csv'\")\n    except Exception as e:\n        print(f\"Error saving correlation results: {e}\")\n        return\n    \n    # Write conclusions\n    try:\n        with open('conclusions.txt', 'w') as f:\n            f.write(\"Conclusions on the correlation between antibiotic use frequency and vaccination modes:\\n\")\n            f.write(f\"Pearson correlation coefficient: {correlation:.4f}\\n\")\n            f.write(f\"P-value: {p_value:.4f}\\n\")\n            if p_value < 0.05:\n                f.write(\"The correlation is statistically significant at the 0.05 significance level.\\n\")\n            else:\n                f.write(\"The correlation is not statistically significant at the 0.05 significance level.\\n\")\n        print(\"Conclusions written to 'conclusions.txt'\")\n    except Exception as e:\n        print(f\"Error writing conclusions: {e}\")\n        return\n    \n    print(\"Analysis completed successfully!\")",
      "source_file": "/tf/active/vicechatdev/smartstat/output/b7a013ae-a461-4aca-abae-9ed243119494/analysis_6cdbc6c8/analysis.py",
      "tags": [
        "statistical-analysis",
        "correlation",
        "data-visualization",
        "pearson-correlation",
        "antibiotic-analysis",
        "vaccination",
        "csv-processing",
        "scatter-plot",
        "data-pipeline",
        "file-io",
        "healthcare-analytics"
      ],
      "updated_at": "2025-12-07T01:59:48.503816",
      "usage_example": "import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy.stats import pearsonr\n\n# Prepare sample input data\ndata = {\n    'DWTreatmentId_False': [10, 15, 20, 25, 30],\n    'DWTreatmentId_True': [12, 18, 22, 28, 35]\n}\ndf = pd.DataFrame(data)\ndf.to_csv('input_data.csv', index=False)\n\n# Run the analysis\nmain()\n\n# Output files created:\n# - plot_01_correlation_antibiotic_vaccination.png\n# - table_01_correlation_results.csv\n# - conclusions.txt"
    },
    {
      "best_practices": [
        "Ensure 'input_data.csv' exists and is properly formatted before calling this function",
        "The function expects specific column names ('Medication_Type', 'DWTreatmentId_False', 'DWTreatmentId_True') - verify data schema compatibility",
        "Check that the working directory has write permissions for output files",
        "The function filters for 'ANTIBIOTICA' medication type - ensure this value exists in your data",
        "Consider wrapping the function call in a try-except block for production use to handle unexpected errors",
        "The function uses early returns on errors - monitor console output for error messages",
        "Output files are overwritten if they already exist - backup important files before running",
        "The histogram uses 20 bins by default - this may need adjustment for different data distributions",
        "The function assumes DWTreatmentId_False and DWTreatmentId_True contain numeric values suitable for addition",
        "For large datasets, consider memory usage as the function loads the entire CSV into memory"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 18:07:33",
      "decorators": [],
      "dependencies": [
        "pandas",
        "numpy",
        "matplotlib",
        "seaborn",
        "scipy"
      ],
      "description": "Performs statistical analysis on antibiotic usage data, comparing distribution patterns between vaccinated and non-vaccinated groups, and generates visualization plots, summary tables, and written conclusions.",
      "docstring": null,
      "id": 1521,
      "imports": [
        "import pandas as pd",
        "import numpy as np",
        "import matplotlib.pyplot as plt",
        "import seaborn as sns",
        "from scipy import stats",
        "import warnings"
      ],
      "imports_required": [
        "import pandas as pd",
        "import numpy as np",
        "import matplotlib.pyplot as plt",
        "import seaborn as sns",
        "from scipy import stats",
        "import warnings"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 67,
      "line_start": 17,
      "name": "main_v92",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a complete data analysis pipeline for examining the relationship between antibiotic use and vaccination status. It loads CSV data, filters for antibiotic medications, calculates total usage metrics, creates distribution visualizations with KDE plots, generates summary statistics, and outputs interpretative conclusions. The function is designed for healthcare data analysis workflows where understanding medication patterns relative to vaccination is important.",
      "return_annotation": null,
      "return_explained": "This function returns None. It performs side effects by creating three output files: (1) 'plot_01_antibiotic_use_vs_vaccination.png' - a histogram with KDE showing antibiotic use distribution by vaccination status, (2) 'table_01_summary_antibiotic_use.csv' - descriptive statistics of antibiotic usage, and (3) 'conclusions.txt' - written interpretations of the analysis. The function prints status messages to console throughout execution and may return early (None) if errors occur during data loading or validation.",
      "settings_required": [
        "Input file 'input_data.csv' must exist in the current working directory",
        "The CSV file must contain columns: 'Medication_Type', 'DWTreatmentId_False', 'DWTreatmentId_True'",
        "Write permissions required in the current directory for output files",
        "Sufficient disk space for output files (plot PNG, CSV table, text file)"
      ],
      "source_code": "def main():\n    print(\"Starting statistical analysis...\")\n    print(f\"Query: Revisit the previous analysis and change the plot reported to become a distribution of antibiotic use versus vaccination modus.\")\n    \n    # Load data\n    try:\n        df = pd.read_csv('input_data.csv')\n        print(f\"Data loaded successfully: {df.shape}\")\n    except Exception as e:\n        print(f\"Error loading data: {e}\")\n        return\n    \n    # Validate necessary columns\n    required_columns = ['Medication_Type', 'DWTreatmentId_False', 'DWTreatmentId_True']\n    for col in required_columns:\n        if col not in df.columns:\n            print(f\"Error: Missing required column '{col}' in the data.\")\n            return\n    \n    # Filter data for antibiotics\n    antibiotics_df = df[df['Medication_Type'] == 'ANTIBIOTICA']\n    print(f\"Filtered antibiotics data: {antibiotics_df.shape}\")\n    \n    # Calculate total antibiotic use\n    antibiotics_df['Total_Antibiotic_Use'] = antibiotics_df['DWTreatmentId_False'] + antibiotics_df['DWTreatmentId_True']\n    \n    # Plot distribution of antibiotic use versus vaccination modus\n    plt.figure(figsize=(10, 6))\n    sns.histplot(data=antibiotics_df, x='Total_Antibiotic_Use', hue='DWTreatmentId_True', bins=20, kde=True)\n    plt.title('Distribution of Antibiotic Use vs. Vaccination Modus')\n    plt.xlabel('Total Antibiotic Use')\n    plt.ylabel('Frequency')\n    plt.legend(title='Vaccination Modus', labels=['Without Vaccination', 'With Vaccination'])\n    plt.tight_layout()\n    plt.savefig('plot_01_antibiotic_use_vs_vaccination.png')\n    print(\"Plot saved as 'plot_01_antibiotic_use_vs_vaccination.png'\")\n    \n    # Create summary table\n    summary_table = antibiotics_df[['Medication_Type', 'Total_Antibiotic_Use']].describe()\n    summary_table.to_csv('table_01_summary_antibiotic_use.csv')\n    print(\"Summary table saved as 'table_01_summary_antibiotic_use.csv'\")\n    \n    # Write conclusions\n    with open('conclusions.txt', 'w') as f:\n        f.write(\"Conclusions and Interpretations:\\n\")\n        f.write(\"1. The distribution plot shows the variation in antibiotic use with respect to vaccination modus.\\n\")\n        f.write(\"2. The summary statistics provide insights into the central tendency and dispersion of antibiotic use.\\n\")\n        f.write(\"3. Further analysis could explore the impact of different vaccination strategies on antibiotic consumption.\\n\")\n    print(\"Conclusions written to 'conclusions.txt'\")\n    \n    print(\"Analysis completed successfully!\")",
      "source_file": "/tf/active/vicechatdev/smartstat/output/b7a013ae-a461-4aca-abae-9ed243119494/analysis_70ac0517/analysis.py",
      "tags": [
        "data-analysis",
        "statistical-analysis",
        "healthcare",
        "antibiotics",
        "vaccination",
        "visualization",
        "histogram",
        "kde-plot",
        "pandas",
        "seaborn",
        "csv-processing",
        "medical-data",
        "distribution-analysis",
        "data-pipeline",
        "reporting"
      ],
      "updated_at": "2025-12-07T01:59:48.503042",
      "usage_example": "# Ensure input_data.csv exists with required columns\n# Example CSV structure:\n# Medication_Type,DWTreatmentId_False,DWTreatmentId_True\n# ANTIBIOTICA,150,200\n# ANTIBIOTICA,180,220\n# OTHER,100,120\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom scipy import stats\nimport warnings\n\n# Define the main function (paste the function code here)\n\n# Execute the analysis\nif __name__ == '__main__':\n    main()\n    # Output files will be created:\n    # - plot_01_antibiotic_use_vs_vaccination.png\n    # - table_01_summary_antibiotic_use.csv\n    # - conclusions.txt"
    },
    {
      "best_practices": [
        "Always run this script from the email-forwarder root directory to pass validation checks",
        "Ensure requirements.txt and src directory exist before execution",
        "The function depends on an external start_service() function which must be defined or imported",
        "Check the return value to determine if the service started successfully",
        "The function prints status messages to stdout, so redirect output if running in automated environments"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 17:34:39",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "dotenv"
      ],
      "description": "Entry point function that validates the working directory and starts an email forwarding service.",
      "docstring": "Main function.",
      "id": 1470,
      "imports": [
        "import os",
        "import sys",
        "import subprocess",
        "import time",
        "from pathlib import Path",
        "from dotenv import load_dotenv",
        "from main import main"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from dotenv import load_dotenv"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 129,
      "line_start": 116,
      "name": "main_v91",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for the Email Forwarder application. It performs a directory validation check to ensure the script is being run from the correct location (email-forwarder directory), then delegates to a start_service() function to initialize and run the email forwarding service. It provides user feedback through console output and returns a boolean indicating success or failure.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: False if the directory validation fails (requirements.txt or src directory not found in current working directory), otherwise returns the boolean result from the start_service() function call (success status of service startup).",
      "settings_required": [
        "Must be run from the email-forwarder directory containing 'requirements.txt' and 'src' folder",
        "Requires a start_service() function to be defined in the same module or imported",
        "May require environment variables loaded via dotenv (based on import, though not explicitly used in this function)"
      ],
      "source_code": "def main():\n    \"\"\"Main function.\"\"\"\n    print(\"\ud83d\udce7 Email Forwarder - Programmatic Startup\")\n    print(\"=========================================\")\n    print()\n    \n    # Check current directory\n    if not (Path('requirements.txt').exists() and Path('src').exists()):\n        print(\"\u274c This script must be run from the email-forwarder directory\")\n        print(f\"   Current directory: {Path.cwd()}\")\n        return False\n    \n    success = start_service()\n    return success",
      "source_file": "/tf/active/vicechatdev/email-forwarder/run_service.py",
      "tags": [
        "entry-point",
        "main-function",
        "email-forwarder",
        "service-startup",
        "directory-validation",
        "initialization",
        "cli"
      ],
      "updated_at": "2025-12-07T01:59:48.502424",
      "usage_example": "# Ensure you are in the email-forwarder directory\n# Directory structure should include:\n#   - requirements.txt\n#   - src/\n\nfrom pathlib import Path\nfrom dotenv import load_dotenv\n\n# Define or import start_service function\ndef start_service():\n    # Service startup logic here\n    return True\n\n# Call main function\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('Service started successfully')\n    else:\n        print('Service failed to start')\n        sys.exit(1)"
    },
    {
      "best_practices": [
        "This function should be called as the entry point of a script using if __name__ == '__main__': main()",
        "Ensure all helper functions (clear_browser_cache_instructions, check_static_files, check_debug_endpoint, touch_static_files, print_help) are defined before calling main()",
        "Command-line arguments are case-insensitive due to .lower() conversion",
        "The function expects sys.argv[1] to contain the command, so it should be run with at least one argument or will display help",
        "Error handling for unknown commands is built-in and will display help information"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 10:47:46",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "A command-line interface (CLI) entry point that parses command-line arguments and dispatches to various development tool functions for managing browser cache, static files, and debug endpoints.",
      "docstring": "Main development tools menu",
      "id": 482,
      "imports": [
        "import os",
        "import time",
        "import requests",
        "import sys",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import sys",
        "import os",
        "import time",
        "import requests",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 102,
      "line_start": 84,
      "name": "main_v90",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a development tools menu system. It processes command-line arguments to execute different development tasks: clearing browser cache ('clear'), checking static files and debug endpoints ('check'), touching/updating static files ('touch'), or displaying debug endpoint information ('info'). If no valid command is provided, it displays help information.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by calling other functions based on command-line arguments and printing output to the console.",
      "settings_required": [
        "Requires the following helper functions to be defined in the same module: clear_browser_cache_instructions(), check_static_files(), check_debug_endpoint(), touch_static_files(), print_help()",
        "Must be executed as a script with command-line arguments accessible via sys.argv"
      ],
      "source_code": "def main():\n    \"\"\"Main development tools menu\"\"\"\n    if len(sys.argv) > 1:\n        command = sys.argv[1].lower()\n        \n        if command == \"clear\":\n            clear_browser_cache_instructions()\n        elif command == \"check\":\n            check_static_files()\n            check_debug_endpoint()\n        elif command == \"touch\":\n            touch_static_files()\n        elif command == \"info\":\n            check_debug_endpoint()\n        else:\n            print(f\"\u274c Unknown command: {command}\")\n            print_help()\n    else:\n        print_help()",
      "source_file": "/tf/active/vicechatdev/vice_ai/dev_tools.py",
      "tags": [
        "cli",
        "command-line",
        "development-tools",
        "menu",
        "dispatcher",
        "entry-point",
        "static-files",
        "cache-management",
        "debug",
        "developer-utilities"
      ],
      "updated_at": "2025-12-07T01:59:48.501416",
      "usage_example": "# Run from command line:\n# python script.py clear\n# python script.py check\n# python script.py touch\n# python script.py info\n# python script.py  # Shows help\n\n# Or call directly in code:\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Always specify the --collection argument as it is required",
        "Ensure ChromaDB server is running before executing this function",
        "Start with default similarity threshold (0.90) and adjust based on results",
        "Use --output-collection to preserve original data during testing",
        "Monitor memory usage when processing large collections as all data is loaded into memory",
        "The clustering/summarization step is currently commented out; uncomment if needed",
        "Consider backing up your ChromaDB collection before running cleanup operations",
        "Review the number of documents before and after cleaning to ensure expected behavior",
        "Lower similarity thresholds (e.g., 0.80) will remove more documents but may lose unique content",
        "Higher similarity thresholds (e.g., 0.95) will be more conservative in removing documents"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only if clustering/summarization functionality is uncommented and enabled",
          "import": "from src.clustering.text_clusterer import TextClusterer",
          "optional": true
        },
        {
          "condition": "imported in source file but not used in current implementation",
          "import": "from src.cleaners.combined_cleaner import CombinedCleaner",
          "optional": true
        },
        {
          "condition": "imported in source file but not directly used in main function",
          "import": "from src.utils.hash_utils import hash_text",
          "optional": true
        },
        {
          "condition": "imported in source file but not directly used in main function",
          "import": "from src.utils.similarity_utils import calculate_similarity",
          "optional": true
        },
        {
          "condition": "imported in source file but not used in current implementation",
          "import": "import os",
          "optional": true
        }
      ],
      "created_at": "2025-12-06 10:34:15",
      "decorators": [],
      "dependencies": [
        "argparse",
        "chromadb",
        "typing"
      ],
      "description": "Command-line interface function that orchestrates a ChromaDB collection cleaning pipeline by removing duplicate and similar documents through hashing and similarity screening.",
      "docstring": null,
      "id": 438,
      "imports": [
        "import argparse",
        "import chromadb",
        "from chromadb.config import Settings",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from src.cleaners.hash_cleaner import HashCleaner",
        "from src.cleaners.similarity_cleaner import SimilarityCleaner",
        "from src.cleaners.combined_cleaner import CombinedCleaner",
        "from src.utils.hash_utils import hash_text",
        "from src.utils.similarity_utils import calculate_similarity",
        "from src.clustering.text_clusterer import TextClusterer",
        "from src.config import Config",
        "import os"
      ],
      "imports_required": [
        "import argparse",
        "import chromadb",
        "from chromadb.config import Settings",
        "from typing import List, Dict, Any",
        "from src.cleaners.hash_cleaner import HashCleaner",
        "from src.cleaners.similarity_cleaner import SimilarityCleaner",
        "from src.config import Config"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 67,
      "line_start": 18,
      "name": "main_v89",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no direct parameters. All configuration is provided via command-line arguments parsed internally using argparse. Command-line arguments include: --collection (required, ChromaDB collection name), --host (ChromaDB host, default 'vice_chroma'), --port (ChromaDB port, default 8000), --similarity-threshold (float, default 0.90), --num-clusters (int, default 10), --skip-summarization (boolean flag), --output-collection (optional output collection name)"
      },
      "parent_class": null,
      "purpose": "This is the main entry point for a ChromaDB data cleaning utility. It parses command-line arguments, initializes cleaning components (HashCleaner and SimilarityCleaner), loads documents from a ChromaDB collection, removes duplicates and near-duplicates based on configurable thresholds, and saves the cleaned data to an output collection. The function supports optional clustering/summarization (currently commented out) and allows users to specify similarity thresholds and output collection names.",
      "return_annotation": null,
      "return_explained": "This function returns None (implicit). It performs side effects by reading from and writing to ChromaDB collections, and prints status messages to stdout indicating the number of documents loaded and saved.",
      "settings_required": [
        "ChromaDB server must be running and accessible at the specified host and port",
        "The specified input collection must exist in ChromaDB",
        "Config class must be properly implemented with attributes: chroma_collection, chroma_host, chroma_port, similarity_threshold, num_clusters, skip_summarization",
        "Functions load_data_from_chromadb() and save_data_to_chromadb() must be defined in the same module or imported",
        "HashCleaner and SimilarityCleaner classes must be properly implemented with clean() methods",
        "Sufficient permissions to read from input collection and write to output collection"
      ],
      "source_code": "def main():\n    # Parse command line arguments\n    parser = argparse.ArgumentParser(description='Clean up ChromaDB collection')\n    parser.add_argument('--collection', type=str, required=True, help='Name of the ChromaDB collection')\n    parser.add_argument('--host', type=str, default='vice_chroma', help='ChromaDB host')\n    parser.add_argument('--port', type=int, default=8000, help='ChromaDB port')\n    parser.add_argument('--similarity-threshold', type=float, default=0.90, \n                        help='Similarity threshold for detecting similar documents')\n    parser.add_argument('--num-clusters', type=int, default=10, \n                        help='Number of clusters for clustering')\n    parser.add_argument('--skip-summarization', action='store_true', \n                        help='Skip the summarization step')\n    parser.add_argument('--output-collection', type=str, default=None,\n                        help='Output collection name (if not specified, will overwrite input collection)')\n    \n    args = parser.parse_args()\n    \n    # Create config object with command line arguments\n    config = Config()\n    config.chroma_collection = args.collection\n    config.chroma_host = args.host\n    config.chroma_port = args.port\n    config.similarity_threshold = args.similarity_threshold\n    config.num_clusters = args.num_clusters\n    config.skip_summarization = args.skip_summarization\n    \n    output_collection = args.output_collection or f\"{config.chroma_collection}_cleaned\"\n    \n    # Initialize cleaners\n    hash_cleaner = HashCleaner(config)\n    similarity_cleaner = SimilarityCleaner(config)\n\n    # Load data from ChromaDB\n    data = load_data_from_chromadb(config)\n    print(f\"Loaded {len(data)} documents from ChromaDB collection '{config.chroma_collection}'\")\n\n    # Step 1: Remove identical text chunks using hashing\n    cleaned_data_hash = hash_cleaner.clean(data)\n\n    # Step 2: Remove nearly similar text chunks using similarity screening\n    cleaned_data_similarity = similarity_cleaner.clean(cleaned_data_hash)\n\n    # Step 3: Cluster and summarize similar text chunks\n    #text_clusterer = TextClusterer(config)\n    #clustered_data = text_clusterer.cluster(cleaned_data_similarity)\n    clustered_data = cleaned_data_similarity\n\n    # Save cleaned and enriched data back to ChromaDB\n    save_data_to_chromadb(clustered_data, config, output_collection)\n    print(f\"Saved {len(clustered_data)} documents to ChromaDB collection '{output_collection}'\")",
      "source_file": "/tf/active/vicechatdev/chromadb-cleanup/main copy.py",
      "tags": [
        "cli",
        "command-line",
        "data-cleaning",
        "deduplication",
        "chromadb",
        "vector-database",
        "similarity-detection",
        "hash-based-deduplication",
        "document-processing",
        "pipeline",
        "orchestration",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.500655",
      "usage_example": "# Run from command line:\n# python script.py --collection my_documents --host localhost --port 8000 --similarity-threshold 0.85 --output-collection my_documents_clean\n\n# Or call directly in Python (not recommended as it's designed for CLI):\nif __name__ == '__main__':\n    main()\n\n# Example with minimal arguments:\n# python script.py --collection my_collection\n\n# Example with all options:\n# python script.py --collection my_docs --host vice_chroma --port 8000 --similarity-threshold 0.90 --num-clusters 10 --skip-summarization --output-collection cleaned_docs"
    },
    {
      "best_practices": [
        "Ensure all required input files exist before calling this function",
        "Configure logging before calling main() to capture all log messages",
        "The function creates a 'signatures' directory in the script's directory - ensure write permissions",
        "Store the owner password logged by the function in a secure location for administrative access",
        "The function expects specific file paths relative to __file__ - adjust paths if running from different locations",
        "Handle exceptions at the caller level if using this as part of a larger application",
        "The watermark image is optional - the function will continue without it if not found",
        "Review logged verification results (hash, PDF/A compliance, protection status) to ensure document integrity",
        "The finalize=True parameter locks the document - ensure this is desired behavior",
        "The function uses compliance_level='2b' for PDF/A-2b standard - adjust if different compliance is needed"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 10:26:49",
      "decorators": [],
      "dependencies": [
        "os",
        "logging",
        "json",
        "sys",
        "pikepdf"
      ],
      "description": "Entry point function that demonstrates document processing workflow by creating an audited, watermarked, and protected PDF/A document from a DOCX file with audit trail data.",
      "docstring": null,
      "id": 422,
      "imports": [
        "import os",
        "import logging",
        "import json",
        "import sys",
        "import pikepdf",
        "from src.document_processor import DocumentProcessor"
      ],
      "imports_required": [
        "import os",
        "import logging",
        "import json",
        "import sys",
        "import pikepdf",
        "from src.document_processor import DocumentProcessor"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 114,
      "line_start": 23,
      "name": "main_v88",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a demonstration and testing entry point for the document processing system. It sets up necessary directories, validates input files (DOCX document, JSON audit data, watermark image), processes the document through the DocumentProcessor pipeline to create a compliant PDF/A output with watermarks and signatures, and performs verification checks on the resulting document including hash verification, PDF/A compliance, and protection status.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including creating directories, processing documents, and logging results. The function may return early (None) if required input files are not found.",
      "settings_required": [
        "Logger must be configured (assumes 'logger' variable exists in module scope)",
        "Directory structure: './signatures/' directory (created if missing)",
        "Input files: './examples/test_document_original.docx' (required)",
        "Input files: './examples/sample_audit_data.json' (required)",
        "Input files: './examples/ViceBio_Logo_dark blue.png' (optional watermark)",
        "DocumentProcessor class must be available from src.document_processor module",
        "DocumentProcessor must have process_document, hash_generator, pdfa_converter, and optionally document_protector attributes"
      ],
      "source_code": "def main():\n    # Create sample directory structure if it doesn't exist\n    signatures_dir = os.path.join(os.path.dirname(__file__), 'signatures')\n    if not os.path.exists(signatures_dir):\n        os.makedirs(signatures_dir)\n        logger.info(f\"Created signatures directory: {signatures_dir}\")\n    \n    # Sample document and audit data\n    sample_doc = os.path.join(os.path.dirname(__file__), './examples/test_document_original.docx')\n    sample_json = os.path.join(os.path.dirname(__file__), './examples/sample_audit_data.json')\n    output_pdf = os.path.join(os.path.dirname(__file__), './examples/audited_document.pdf')\n    watermark_path = os.path.join(os.path.dirname(__file__), './examples/ViceBio_Logo_dark blue.png')\n    \n    # Check if files exist\n    if not os.path.exists(sample_doc):\n        logger.error(f\"Sample document not found: {sample_doc}\")\n        return\n    \n    if not os.path.exists(sample_json):\n        logger.error(f\"Audit data JSON not found: {sample_json}\")\n        return\n    \n    if not os.path.exists(watermark_path):\n        logger.warning(f\"Watermark image not found: {watermark_path}\")\n        watermark_path = None\n    \n    # Initialize document processor\n    processor = DocumentProcessor()\n    \n    # Process document\n    try:\n        output_path = processor.process_document(\n            original_doc_path=sample_doc,\n            json_path=sample_json,\n            output_path=output_pdf,\n            watermark_image=watermark_path,\n            include_signatures=True,\n            convert_to_pdfa=True,\n            compliance_level='2b',\n            finalize=True  # Add this parameter to lock the document\n        )\n        \n        logger.info(f\"Successfully created audited document: {output_path}\")\n        \n        # Verify document hash using processor's stored hash if available\n        if hasattr(processor, '_last_document_hash'):\n            logger.info(\"Using stored document hash for verification\")\n            stored_hash = processor._last_document_hash\n            extracted_hash = None\n            \n            try:\n                with pikepdf.open(output_path) as pdf:\n                    if \"/DocumentHash\" in pdf.docinfo:\n                        hash_json = pdf.docinfo[\"/DocumentHash\"]\n                        hash_metadata = json.loads(str(hash_json))\n                        extracted_hash = hash_metadata.get(\"hash\")\n            except Exception as e:\n                logger.warning(f\"Could not extract hash from PDF metadata: {e}\")\n            \n            hash_verified = stored_hash == extracted_hash\n            if hash_verified:\n                logger.info(f\"Document hash verification: Passed \u2705\")\n            else:\n                logger.warning(f\"Document hash verification: Failed \u274c\")\n        else:\n            # Fall back to standard verification\n            hash_verified = processor.hash_generator.verify_hash(output_path)\n            if hash_verified:\n                logger.info(f\"Document hash verification: Passed \u2705\")\n            else:\n                logger.warning(f\"Document hash verification: Failed \u274c\")\n        \n        # Verify PDF/A compliance\n        pdfa_compliant = processor.pdfa_converter.validate_pdfa(output_path)\n        if pdfa_compliant:\n            logger.info(f\"PDF/A compliance check: Passed \u2705\")\n        else:\n            logger.warning(f\"PDF/A compliance check: Failed \u274c\")\n        \n        # Check if document is protected\n        is_protected = hasattr(processor, 'document_protector') and hasattr(processor, '_last_owner_password')\n        if is_protected:\n            logger.info(\"\ud83d\udd12 Document is protected from editing\")\n            logger.info(f\"Owner password: {getattr(processor, '_last_owner_password', 'Not available')}\")\n            logger.info(\"Keep this password in a secure location for administrative access\")\n        else:\n            logger.info(\"\u26a0\ufe0f Document is not protected from editing\")\n            \n        logger.info(f\"Document processing complete. Output file: {output_path}\")\n        \n    except Exception as e:\n        logger.error(f\"Error processing document: {e}\", exc_info=True)",
      "source_file": "/tf/active/vicechatdev/document_auditor/main.py",
      "tags": [
        "document-processing",
        "pdf-generation",
        "audit-trail",
        "watermarking",
        "pdf-a-compliance",
        "document-protection",
        "hash-verification",
        "entry-point",
        "demo",
        "file-validation",
        "docx-to-pdf"
      ],
      "updated_at": "2025-12-07T01:59:48.499906",
      "usage_example": "# Ensure required files exist in examples directory:\n# - examples/test_document_original.docx\n# - examples/sample_audit_data.json\n# - examples/ViceBio_Logo_dark blue.png (optional)\n\nimport os\nimport logging\nimport json\nimport sys\nimport pikepdf\nfrom src.document_processor import DocumentProcessor\n\n# Configure logger\nlogger = logging.getLogger(__name__)\nlogging.basicConfig(level=logging.INFO)\n\n# Run the main function\nif __name__ == '__main__':\n    main()\n\n# Output will be created at: ./examples/audited_document.pdf\n# The function will log verification results for hash, PDF/A compliance, and protection status"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the diagnostic script, typically within an if __name__ == '__main__': block",
        "Ensure that test_different_scopes() and test_tenant_admin_center_approach() functions are properly defined before calling main()",
        "The function assumes console output is available and appropriate for the execution environment",
        "Consider adding error handling around the test function calls to prevent the entire diagnostic from failing if one test encounters an error",
        "The function provides guidance but does not automatically fix issues - manual intervention by SharePoint administrators may be required"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:22:08",
      "decorators": [],
      "dependencies": [
        "requests",
        "json"
      ],
      "description": "Entry point function that runs a SharePoint permission diagnostic tool, testing different authentication scopes and providing troubleshooting guidance.",
      "docstring": "Main diagnostic function.",
      "id": 238,
      "imports": [
        "import requests",
        "import json"
      ],
      "imports_required": [
        "import requests",
        "import json"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 165,
      "line_start": 151,
      "name": "main_v87",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main orchestrator for a SharePoint permission diagnostic utility. It executes two test functions (test_different_scopes and test_tenant_admin_center_approach) to diagnose SharePoint app-only authentication issues, then displays a summary of potential solutions for enabling app-only tokens and proper app registration. It's designed to help developers troubleshoot SharePoint API authentication problems.",
      "return_annotation": null,
      "return_explained": "This function returns None (implicitly). It performs side effects by printing diagnostic information and test results to stdout.",
      "settings_required": [
        "Requires test_different_scopes() function to be defined in the same module",
        "Requires test_tenant_admin_center_approach() function to be defined in the same module",
        "May require SharePoint tenant URL and authentication credentials configured in the called test functions",
        "May require Azure AD app registration details (client ID, client secret, tenant ID) for SharePoint API access"
      ],
      "source_code": "def main():\n    \"\"\"Main diagnostic function.\"\"\"\n    print(\"SharePoint Permission Diagnostic Tool\")\n    print(\"=\" * 50)\n    \n    test_different_scopes()\n    print()\n    test_tenant_admin_center_approach()\n    \n    print(\"\\n\" + \"=\" * 50)\n    print(\"\ud83d\udccb Summary of Solutions to Try:\")\n    print(\"1. Use SharePoint Admin Center to enable app-only tokens\")\n    print(\"2. Use API Management to approve permissions\")\n    print(\"3. Use PowerShell with PnP to register the app properly\")\n    print(\"4. Contact your SharePoint admin to enable app-only authentication\")",
      "source_file": "/tf/active/vicechatdev/SPFCsync/diagnose_permissions.py",
      "tags": [
        "sharepoint",
        "diagnostic",
        "authentication",
        "troubleshooting",
        "app-only-tokens",
        "permissions",
        "azure-ad",
        "main-entry-point",
        "testing",
        "admin-tools"
      ],
      "updated_at": "2025-12-07T01:59:48.499260",
      "usage_example": "# Assuming test_different_scopes() and test_tenant_admin_center_approach() are defined\nimport requests\nimport json\n\ndef test_different_scopes():\n    print(\"Testing different authentication scopes...\")\n    # Implementation here\n\ndef test_tenant_admin_center_approach():\n    print(\"Testing tenant admin center approach...\")\n    # Implementation here\n\ndef main():\n    \"\"\"Main diagnostic function.\"\"\"\n    print(\"SharePoint Permission Diagnostic Tool\")\n    print(\"=\" * 50)\n    \n    test_different_scopes()\n    print()\n    test_tenant_admin_center_approach()\n    \n    print(\"\\n\" + \"=\" * 50)\n    print(\"\ud83d\udccb Summary of Solutions to Try:\")\n    print(\"1. Use SharePoint Admin Center to enable app-only tokens\")\n    print(\"2. Use API Management to approve permissions\")\n    print(\"3. Use PowerShell with PnP to register the app properly\")\n    print(\"4. Contact your SharePoint admin to enable app-only authentication\")\n\nif __name__ == \"__main__\":\n    main()"
    },
    {
      "best_practices": [
        "This function is designed for testing purposes only and uses a hardcoded UUID - it should not be used in production code",
        "Ensure proper authentication is configured before running this function",
        "The function provides console output for debugging - consider redirecting or capturing output in automated test environments",
        "The hardcoded UUID should be replaced with a parameterized value for reusable testing",
        "Error handling catches all exceptions broadly - consider more specific exception handling for production use",
        "Verify network connectivity and reMarkable cloud service availability before execution"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:56:49",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "hashlib",
        "uuid",
        "base64",
        "zlib",
        "pathlib",
        "crc32c"
      ],
      "description": "A test function that attempts to move a specific document (identified by UUID) from trash to a 'gpt_in' folder on a reMarkable device using the DocumentMover class.",
      "docstring": "Test moving a document to gpt_in folder",
      "id": 2124,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "import crc32c"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 546,
      "line_start": 523,
      "name": "main_v86",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a test harness to verify the document moving functionality of the DocumentMover class. It specifically tests moving a hardcoded document (UUID: 206f5df3-07c2-4341-8afd-2b7362aefa91) from trash to the 'gpt_in' folder on a reMarkable device, providing console feedback about the operation's success or failure.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the document was successfully moved to the gpt_in folder, False if the operation failed or an exception occurred during initialization.",
      "settings_required": [
        "DocumentMover class must be available in the same module or imported",
        "RemarkableAuth class must be available (from auth module)",
        "Valid reMarkable device authentication credentials configured in RemarkableAuth",
        "Network connectivity to reMarkable cloud services",
        "The document with UUID '206f5df3-07c2-4341-8afd-2b7362aefa91' must exist in the reMarkable account",
        "A 'gpt_in' folder must exist or be creatable on the reMarkable device"
      ],
      "source_code": "def main():\n    \"\"\"Test moving a document to gpt_in folder\"\"\"\n    try:\n        mover = DocumentMover()\n        \n        # Use the document we know exists\n        test_doc_uuid = \"206f5df3-07c2-4341-8afd-2b7362aefa91\"\n        \n        print(f\"\ud83e\uddea Testing Document Move to gpt_in Folder\")\n        print(f\"Target document: {test_doc_uuid}\")\n        \n        success = mover.move_document_from_trash(test_doc_uuid)\n        \n        if success:\n            print(f\"\\n\u2705 Test completed successfully!\")\n            print(f\"\ud83d\udca1 Check your reMarkable device - the document should now be visible in the gpt_in folder\")\n        else:\n            print(f\"\\n\u274c Test failed\")\n        \n        return success\n        \n    except Exception as e:\n        print(f\"\u274c Test failed to initialize: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_move_from_trash.py",
      "tags": [
        "test",
        "remarkable",
        "document-management",
        "file-operations",
        "cloud-sync",
        "integration-test",
        "device-management"
      ],
      "updated_at": "2025-12-07T01:59:48.498609",
      "usage_example": "# Ensure DocumentMover and RemarkableAuth classes are available\n# from document_mover import DocumentMover\n# from auth import RemarkableAuth\n\n# Run the test\nif __name__ == '__main__':\n    result = main()\n    if result:\n        print('Document successfully moved')\n    else:\n        print('Document move failed')"
    },
    {
      "best_practices": [
        "This function is designed for debugging purposes and outputs directly to stdout; it should not be used in production code that requires structured output",
        "The function hardcodes the gpt_in folder UUID ('99c6551f-2855-44cf-a4e4-c9c586558f42'); this should be parameterized for reusability",
        "Ensure proper authentication is established before calling this function to avoid API errors",
        "The function catches all exceptions broadly; consider more specific exception handling for production use",
        "This function depends on the FolderDebugger class which must implement methods: get_root_info(), analyze_gpt_in_folder(), find_documents_in_folder(), and check_web_app_sync_status()",
        "The function is intended as a standalone diagnostic tool and should be run in a context where console output is appropriate"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:54:01",
      "decorators": [],
      "dependencies": [
        "json",
        "auth (custom module containing RemarkableAuth)",
        "FolderDebugger (custom class, not shown in imports but instantiated in function)"
      ],
      "description": "Diagnostic function that debugs visibility issues with the 'gpt_in' folder in a reMarkable tablet's file system by analyzing folder metadata, document contents, and sync status.",
      "docstring": "Debug the gpt_in folder visibility issue",
      "id": 2116,
      "imports": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import json",
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 251,
      "line_start": 199,
      "name": "main_v85",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function performs comprehensive debugging of the 'gpt_in' folder visibility issue on reMarkable tablets. It instantiates a FolderDebugger, retrieves root folder information, analyzes the gpt_in folder's metadata (checking for deletion status, parent folder, and other properties), finds documents within the folder, checks web app sync status, and provides a detailed diagnostic report. The function is designed to help troubleshoot why documents might not appear in the reMarkable web application despite existing in the device's file system.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the debug process completes successfully (regardless of whether issues are found), False if an exception occurs during debugging. The function primarily outputs diagnostic information to stdout rather than returning data.",
      "settings_required": [
        "FolderDebugger class must be defined and available in the same module or imported",
        "RemarkableAuth must be properly configured with valid reMarkable API credentials",
        "Network access to reMarkable cloud services for API calls",
        "Valid authentication tokens for reMarkable API access"
      ],
      "source_code": "def main():\n    \"\"\"Debug the gpt_in folder visibility issue\"\"\"\n    try:\n        debugger = FolderDebugger()\n        \n        print(f\"\ud83d\udd0d Debugging GPT_IN Folder Visibility\")\n        print(\"=\" * 50)\n        \n        # Get root info\n        root_data, root_content = debugger.get_root_info()\n        \n        # Analyze gpt_in folder\n        gpt_in_info, gpt_in_metadata = debugger.analyze_gpt_in_folder(root_content)\n        \n        if gpt_in_info and gpt_in_metadata:\n            print(f\"\\n\ud83d\udcca GPT_IN FOLDER ANALYSIS:\")\n            print(f\"   Folder Name: {gpt_in_metadata.get('visibleName', 'Unknown')}\")\n            print(f\"   Folder Type: {gpt_in_metadata.get('type', 'Unknown')}\")\n            print(f\"   Folder Parent: {gpt_in_metadata.get('parent', 'Unknown')}\")\n            print(f\"   Folder Deleted: {gpt_in_metadata.get('deleted', False)}\")\n            print(f\"   Folder Pinned: {gpt_in_metadata.get('pinned', False)}\")\n            \n            # Check if folder is deleted or has issues\n            if gpt_in_metadata.get('deleted', False):\n                print(f\"\u274c ISSUE FOUND: gpt_in folder is marked as DELETED!\")\n            elif gpt_in_metadata.get('parent') != '':\n                print(f\"\u274c ISSUE FOUND: gpt_in folder parent is '{gpt_in_metadata.get('parent')}', should be '' (root)!\")\n            else:\n                print(f\"\u2705 gpt_in folder appears healthy\")\n        \n        # Find documents in gpt_in folder\n        gpt_in_uuid = \"99c6551f-2855-44cf-a4e4-c9c586558f42\"\n        documents = debugger.find_documents_in_folder(root_content, gpt_in_uuid)\n        \n        # Check sync status\n        sync_status = debugger.check_web_app_sync_status()\n        \n        print(f\"\\n\ud83c\udfaf SUMMARY:\")\n        print(f\"   \ud83d\udcc1 gpt_in folder: {'\u2705 Found' if gpt_in_info else '\u274c Missing'}\")\n        print(f\"   \ud83d\udcc4 Documents in folder: {len(documents) if documents else 0}\")\n        print(f\"   \ud83c\udf10 Sync generation: {sync_status.get('generation') if sync_status else 'Unknown'}\")\n        \n        if gpt_in_info and documents:\n            print(f\"\\n\ud83d\udca1 CONCLUSION:\")\n            print(f\"   The gpt_in folder exists and contains documents.\")\n            print(f\"   If documents don't appear in the web app, this is likely a client caching issue.\")\n            print(f\"   Try refreshing the web app, clearing browser cache, or waiting for sync.\")\n        \n        return True\n        \n    except Exception as e:\n        print(f\"\u274c Debug failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/debug_gpt_in_folder.py",
      "tags": [
        "debugging",
        "diagnostics",
        "remarkable-tablet",
        "folder-visibility",
        "file-system",
        "sync-status",
        "metadata-analysis",
        "troubleshooting",
        "cloud-sync"
      ],
      "updated_at": "2025-12-07T01:59:48.497864",
      "usage_example": "# Ensure FolderDebugger class is defined or imported\n# Ensure RemarkableAuth is configured with valid credentials\n\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('Debug completed successfully')\n    else:\n        print('Debug failed with errors')"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the script, typically within an 'if __name__ == \"__main__\"' block",
        "Ensure that analyze_rm_filename_patterns() and generate_header_examples() functions are properly defined before calling main()",
        "The function provides user-friendly output with emoji indicators (\u2705, \u274c) for status feedback",
        "Error handling is broad (catches all exceptions), which is appropriate for a top-level main function but may hide specific error details",
        "The function prints next steps for the user, suggesting it's part of a development/debugging workflow",
        "Consider checking the return value when calling this function programmatically to handle failure cases"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:48:43",
      "decorators": [],
      "dependencies": [
        "json",
        "pathlib"
      ],
      "description": "Main entry point function that orchestrates an analysis of filename patterns and generates header examples, with error handling and user feedback.",
      "docstring": "Run the analysis",
      "id": 2104,
      "imports": [
        "import json",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import json",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 172,
      "line_start": 157,
      "name": "main_v84",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary execution point for analyzing rm-filename patterns and generating corresponding header examples. It coordinates the execution of two analysis functions, provides status feedback to the user, and offers guidance on next steps for fixing upload functionality related to rm-filename handling. The function is designed to be called as the main entry point of a script that analyzes and documents proper filename pattern usage.",
      "return_annotation": null,
      "return_explained": "Returns False if an exception occurs during execution, otherwise returns None implicitly. The False return value indicates analysis failure, while no explicit return (None) indicates successful completion.",
      "settings_required": [
        "Requires analyze_rm_filename_patterns() function to be defined in the same module or imported",
        "Requires generate_header_examples() function to be defined in the same module or imported",
        "May require access to data files or directories that these analysis functions operate on"
      ],
      "source_code": "def main():\n    \"\"\"Run the analysis\"\"\"\n    try:\n        patterns = analyze_rm_filename_patterns()\n        generate_header_examples()\n        \n        print(f\"\\n\u2705 Analysis complete!\")\n        print(\"Next steps:\")\n        print(\"1. Fix upload_raw_content to ALWAYS include rm-filename\")\n        print(\"2. Add proper rm-filename patterns for root/system files\") \n        print(\"3. Follow the correct upload sequence\")\n        print(\"4. Test with the corrected headers\")\n        \n    except Exception as e:\n        print(f\"\u274c Analysis failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_headers.py",
      "tags": [
        "entry-point",
        "orchestration",
        "analysis",
        "filename-patterns",
        "error-handling",
        "user-feedback",
        "main-function",
        "workflow-coordination"
      ],
      "updated_at": "2025-12-07T01:59:48.497220",
      "usage_example": "if __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function has a hardcoded document UUID which limits its reusability - consider parameterizing the UUID for production use",
        "The function includes user-friendly console output with emoji indicators for status updates",
        "Error handling is implemented with a try-except block to catch initialization failures",
        "The function returns a boolean for easy integration into larger scripts or test suites",
        "Consider adding logging instead of print statements for production environments",
        "The DocumentRefresher class must be properly initialized with authentication before use"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:44:05",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "hashlib",
        "uuid",
        "base64",
        "zlib",
        "pathlib",
        "crc32c"
      ],
      "description": "A standalone function that forces a refresh of document visibility for a specific hardcoded Remarkable document UUID by instantiating a DocumentRefresher and calling its force_refresh_document method.",
      "docstring": "Force refresh the document visibility",
      "id": 2093,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 317,
      "line_start": 294,
      "name": "main_v83",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a test/utility script to manually trigger a document visibility refresh operation for a Remarkable cloud document. It targets a specific document (UUID: 206f5df3-07c2-4341-8afd-2b7362aefa91) and attempts to make it visible in the 'gpt_in' folder of the web application. This is useful for debugging synchronization issues or manually forcing document metadata updates.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the document refresh operation completed successfully, False if the refresh failed or an exception occurred during initialization. The return value indicates whether the document visibility was successfully updated.",
      "settings_required": [
        "DocumentRefresher class must be defined and available in the same module or imported",
        "RemarkableAuth module must be available with proper authentication configuration",
        "Valid Remarkable cloud API credentials configured in the auth module",
        "Network access to Remarkable cloud services",
        "The target document UUID (206f5df3-07c2-4341-8afd-2b7362aefa91) should exist in the Remarkable cloud account"
      ],
      "source_code": "def main():\n    \"\"\"Force refresh the document visibility\"\"\"\n    try:\n        refresher = DocumentRefresher()\n        \n        # Use the document we know exists\n        test_doc_uuid = \"206f5df3-07c2-4341-8afd-2b7362aefa91\"\n        \n        print(f\"\ud83e\uddea Force Refreshing Document Visibility\")\n        print(f\"Target document: {test_doc_uuid}\")\n        \n        success = refresher.force_refresh_document(test_doc_uuid)\n        \n        if success:\n            print(f\"\\n\u2705 Refresh completed successfully!\")\n            print(f\"\ud83d\udca1 Check the web app - the document should now be visible in gpt_in folder\")\n        else:\n            print(f\"\\n\u274c Refresh failed\")\n        \n        return success\n        \n    except Exception as e:\n        print(f\"\u274c Refresh failed to initialize: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/force_web_app_refresh.py",
      "tags": [
        "remarkable",
        "document-sync",
        "cloud-storage",
        "visibility-refresh",
        "utility",
        "test-function",
        "document-management",
        "force-update"
      ],
      "updated_at": "2025-12-07T01:59:48.496569",
      "usage_example": "# Assuming DocumentRefresher class is defined in the same file\n# and all required imports are present\n\nif __name__ == '__main__':\n    result = main()\n    if result:\n        print('Document refresh successful')\n    else:\n        print('Document refresh failed')"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the test script, typically within an 'if __name__ == \"__main__\"' block",
        "The function provides comprehensive error handling, so callers should check the boolean return value to determine test success",
        "Stack traces are automatically printed on failure, making debugging easier without additional logging setup",
        "Ensure all required modules (auth, upload_manager, SimplePDFUploadTest) are properly configured before calling this function",
        "Consider using the return value to set appropriate exit codes in command-line scripts",
        "The function catches all exceptions, so specific error types are not propagated to callers"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported lazily when an exception occurs to print stack trace",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:38:13",
      "decorators": [],
      "dependencies": [
        "reportlab",
        "pathlib",
        "typing"
      ],
      "description": "Entry point function that initializes and runs a PDF upload test for reMarkable devices, with comprehensive error handling and traceback reporting.",
      "docstring": "Run the simple PDF upload test",
      "id": 2078,
      "imports": [
        "import os",
        "import json",
        "import time",
        "import uuid",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any",
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from reportlab.pdfgen import canvas",
        "from reportlab.lib.pagesizes import letter",
        "import traceback",
        "import traceback",
        "import traceback"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import time",
        "import uuid",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict, Any",
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from reportlab.pdfgen import canvas",
        "from reportlab.lib.pagesizes import letter",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 262,
      "line_start": 252,
      "name": "main_v82",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for testing PDF upload functionality to reMarkable devices. It instantiates a SimplePDFUploadTest object, executes the test, and handles any exceptions that occur during initialization or execution. The function provides detailed error reporting including stack traces to aid in debugging test failures.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating test success (True) or failure (False). Returns True if the test completes successfully, False if any exception occurs during test initialization or execution.",
      "settings_required": [
        "SimplePDFUploadTest class must be defined and importable in the same module or imported",
        "RemarkableAuth module must be available with proper authentication configuration",
        "RemarkableUploadManager module must be available for handling uploads",
        "Appropriate reMarkable API credentials or authentication tokens may be required",
        "Network connectivity to reMarkable cloud services"
      ],
      "source_code": "def main():\n    \"\"\"Run the simple PDF upload test\"\"\"\n    try:\n        test = SimplePDFUploadTest()\n        success = test.run_test()\n        return success\n    except Exception as e:\n        print(f\"\u274c Test initialization failed: {e}\")\n        import traceback\n        traceback.print_exc()\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_simple_pdf_upload.py",
      "tags": [
        "testing",
        "pdf",
        "upload",
        "remarkable",
        "entry-point",
        "error-handling",
        "integration-test",
        "main-function",
        "test-runner"
      ],
      "updated_at": "2025-12-07T01:59:48.495930",
      "usage_example": "if __name__ == '__main__':\n    # Run the PDF upload test\n    success = main()\n    \n    # Exit with appropriate status code\n    import sys\n    sys.exit(0 if success else 1)\n    \n    # Alternative: Use result for further processing\n    # if success:\n    #     print('Test passed successfully')\n    # else:\n    #     print('Test failed')"
    },
    {
      "best_practices": [
        "This function modifies sys.path at runtime, which should only be done in test/development contexts, not in production code",
        "The function performs lazy imports inside the function body, which is useful for testing but may hide import errors until runtime",
        "Error handling is minimal - only checks if session is None/False, but doesn't catch exceptions from build_complete_replica()",
        "The replica directory name 'remarkable_replica' is hardcoded - consider making it configurable for different test scenarios",
        "This function is designed as a test entry point and should be called from if __name__ == '__main__': block",
        "Ensure proper cleanup of the replica directory between test runs if needed"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside the function to modify sys.path for local module imports",
          "import": "import sys",
          "optional": false
        },
        {
          "condition": "imported inside the function after sys.path modification to access the authentication module",
          "import": "from auth import RemarkableAuth",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:36:19",
      "decorators": [],
      "dependencies": [
        "sys",
        "pathlib",
        "os",
        "json",
        "zipfile",
        "requests",
        "logging",
        "shutil",
        "enum",
        "typing",
        "dataclasses",
        "datetime",
        "re"
      ],
      "description": "A test function that authenticates with the Remarkable cloud service and builds a complete local replica of the user's Remarkable data.",
      "docstring": "Main function for testing",
      "id": 2072,
      "imports": [
        "import os",
        "import json",
        "import zipfile",
        "import requests",
        "import logging",
        "import shutil",
        "from enum import Enum",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from dataclasses import dataclass",
        "from dataclasses import asdict",
        "from datetime import datetime",
        "import sys",
        "from auth import RemarkableAuth",
        "import re"
      ],
      "imports_required": [
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 640,
      "line_start": 621,
      "name": "main_v81",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a testing entry point for the Remarkable synchronization system. It performs authentication using RemarkableAuth, establishes a session, and then creates a local replica of all Remarkable cloud data using the RemarkableLocalReplica class. The function is designed to verify that the authentication and replication pipeline works correctly.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating success or failure. Returns False if authentication fails, otherwise returns the success status from the replica.build_complete_replica() method (expected to be a boolean indicating whether the replica was built successfully).",
      "settings_required": [
        "RemarkableAuth module must be present in the same directory as the script",
        "RemarkableLocalReplica class must be defined in the same module or imported",
        "Valid Remarkable cloud credentials (handled by RemarkableAuth)",
        "Network connectivity to Remarkable cloud services",
        "Write permissions for creating the 'remarkable_replica' directory"
      ],
      "source_code": "def main():\n    \"\"\"Main function for testing\"\"\"\n    import sys\n    sys.path.insert(0, str(Path(__file__).parent))\n    \n    from auth import RemarkableAuth\n    \n    # Authenticate\n    auth = RemarkableAuth()\n    session = auth.authenticate()\n    \n    if not session:\n        print(\"\u274c Authentication failed\")\n        return False\n    \n    # Build replica\n    replica = RemarkableLocalReplica(session, \"remarkable_replica\")\n    success = replica.build_complete_replica()\n    \n    return success",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/local_replica.py",
      "tags": [
        "testing",
        "authentication",
        "remarkable",
        "cloud-sync",
        "replication",
        "entry-point",
        "integration-test",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:59:48.495280",
      "usage_example": "# Assuming this function is in a file called remarkable_sync.py\n# and the auth module and RemarkableLocalReplica class are available\n\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('\u2705 Replica built successfully')\n    else:\n        print('\u274c Failed to build replica')\n    sys.exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function should only be called as the main entry point of the program, typically within an 'if __name__ == \"__main__\"' block",
        "Ensure run_complete_test_suite() is properly defined before calling main()",
        "The function uses sys.exit() which terminates the entire program - do not call this from within other functions unless program termination is intended",
        "Exit code 0 indicates success, exit code 1 indicates failure - this follows Unix/Linux conventions",
        "The function provides three levels of error handling: normal test failure, user interruption (Ctrl+C), and unexpected exceptions",
        "Traceback is printed for unexpected exceptions to aid in debugging"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when an unexpected exception occurs to print the full stack trace",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:32:34",
      "decorators": [],
      "dependencies": [
        "sys",
        "pathlib",
        "time",
        "traceback"
      ],
      "description": "Main entry point function that executes a complete test suite and handles program exit codes based on test results and exceptions.",
      "docstring": "Main entry point",
      "id": 2062,
      "imports": [
        "import sys",
        "from pathlib import Path",
        "import time",
        "from auth import RemarkableAuth",
        "from discovery import RemarkableDiscovery",
        "from local_replica import RemarkableLocalReplica",
        "import traceback",
        "import traceback"
      ],
      "imports_required": [
        "import sys",
        "import traceback",
        "from pathlib import Path",
        "import time",
        "from auth import RemarkableAuth",
        "from discovery import RemarkableDiscovery",
        "from local_replica import RemarkableLocalReplica"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 228,
      "line_start": 214,
      "name": "main_v80",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary entry point for running a test suite. It orchestrates the execution of run_complete_test_suite(), manages the program's exit status based on success/failure, and provides comprehensive error handling for keyboard interrupts and unexpected exceptions. It ensures clean program termination with appropriate exit codes (0 for success, 1 for failure) and user-friendly error messages.",
      "return_annotation": null,
      "return_explained": "This function does not return a value. Instead, it terminates the program using sys.exit() with exit code 0 if tests pass successfully, or exit code 1 if tests fail, user interrupts execution, or an unexpected error occurs.",
      "settings_required": [
        "The function run_complete_test_suite() must be defined and accessible in the same module or imported",
        "Custom modules 'auth', 'discovery', and 'local_replica' must be available in the Python path",
        "Any configuration required by RemarkableAuth, RemarkableDiscovery, and RemarkableLocalReplica classes"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point\"\"\"\n    try:\n        success = run_complete_test_suite()\n        sys.exit(0 if success else 1)\n        \n    except KeyboardInterrupt:\n        print(\"\\n\u23f9\ufe0f Test suite interrupted by user\")\n        sys.exit(1)\n        \n    except Exception as e:\n        print(f\"\\n\ud83d\udca5 Unexpected error: {e}\")\n        import traceback\n        traceback.print_exc()\n        sys.exit(1)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_complete_suite.py",
      "tags": [
        "entry-point",
        "test-suite",
        "error-handling",
        "exit-codes",
        "exception-handling",
        "keyboard-interrupt",
        "testing",
        "remarkable",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:59:48.494643",
      "usage_example": "if __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Always run the dry-run preview before applying changes to understand the impact",
        "Ensure proper authentication is configured before running this function",
        "Handle the boolean return value to determine if the operation succeeded",
        "Consider backing up data before running repair operations",
        "The function uses user input, so it should only be run in interactive environments, not in automated scripts",
        "Wrap calls to this function in proper error handling at the application level"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:27:19",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "Interactive command-line tool that runs a schema repair process with a dry-run preview before applying changes to the root document schema.",
      "docstring": "Run the corrected repair tool",
      "id": 2044,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Tuple",
        "from typing import Any",
        "import requests",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 299,
      "line_start": 277,
      "name": "main_v79",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for a repair tool that fixes issues in a root document schema. It implements a two-phase approach: first showing a preview of changes in dry-run mode, then prompting the user for confirmation before applying the actual corrections. This prevents accidental data modifications and allows users to review changes before committing them.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating success (True) or failure (False) of the repair operation. Returns False if the dry run fails, if the user cancels the operation, or if an exception occurs during execution. Returns the result of the actual repair operation (boolean) if the user confirms the changes.",
      "settings_required": [
        "CorrectedRootDocSchemaRepair class must be defined and available in the same module or imported",
        "Authentication credentials required by RemarkableAuth (likely API keys or tokens for Remarkable service)",
        "Network access to communicate with Remarkable API endpoints"
      ],
      "source_code": "def main():\n    \"\"\"Run the corrected repair tool\"\"\"\n    try:\n        repair = CorrectedRootDocSchemaRepair()\n        \n        # First run dry-run\n        print(\"\ud83d\udd0d Running DRY RUN to preview changes...\")\n        success = repair.preview_changes(dry_run=True)\n        \n        if success:\n            response = input(\"\\n\ud83d\ude80 Apply the corrections? (yes/no): \").strip().lower()\n            if response in ['yes', 'y']:\n                return repair.preview_changes(dry_run=False)\n            else:\n                print(\"\u274c Repair cancelled by user\")\n                return False\n        else:\n            print(\"\u274c Dry run failed\")\n            return False\n            \n    except Exception as e:\n        print(f\"\u274c Repair tool failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/corrected_repair.py",
      "tags": [
        "cli",
        "interactive",
        "repair-tool",
        "schema-repair",
        "dry-run",
        "user-confirmation",
        "remarkable",
        "document-schema",
        "data-migration",
        "validation"
      ],
      "updated_at": "2025-12-07T01:59:48.493997",
      "usage_example": "if __name__ == '__main__':\n    success = main()\n    if success:\n        print('Schema repair completed successfully')\n    else:\n        print('Schema repair failed or was cancelled')\n    exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the script, typically within an 'if __name__ == \"__main__\":' block",
        "The function requires run_full_test_suite() to be defined elsewhere in the codebase",
        "Exit codes follow Unix conventions: 0 for success, non-zero (1) for any failure",
        "User interruptions (Ctrl+C) are handled gracefully with a clear message",
        "Full exception tracebacks are printed for debugging unexpected errors",
        "The function does not accept command-line arguments; if needed, they should be parsed before calling main()"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when an unexpected exception occurs during test execution",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:24:51",
      "decorators": [],
      "dependencies": [
        "sys",
        "pathlib",
        "traceback"
      ],
      "description": "Main entry point function that executes a full test suite and handles program exit codes based on test results and exceptions.",
      "docstring": "Main entry point",
      "id": 2039,
      "imports": [
        "import sys",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "from auth import authenticate_remarkable",
        "from discovery import RemarkableDiscovery",
        "import traceback"
      ],
      "imports_required": [
        "import sys",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "from auth import authenticate_remarkable",
        "from discovery import RemarkableDiscovery",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 129,
      "line_start": 115,
      "name": "main_v78",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary entry point for running a test suite, likely for testing reMarkable device authentication and discovery functionality. It orchestrates test execution, handles user interruptions gracefully, catches unexpected errors with full traceback output, and returns appropriate exit codes (0 for success, 1 for failure) to the operating system.",
      "return_annotation": null,
      "return_explained": "This function does not return a value in the traditional sense. Instead, it calls sys.exit() with an integer exit code: 0 if all tests pass successfully, or 1 if tests fail, user interrupts execution (KeyboardInterrupt), or an unexpected exception occurs. The exit code can be captured by the shell or calling process.",
      "settings_required": [
        "The function depends on run_full_test_suite() being defined in the same module or imported",
        "Custom modules 'auth' and 'discovery' must be available in the Python path",
        "Any configuration required by RemarkableAuth, authenticate_remarkable, and RemarkableDiscovery components"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point\"\"\"\n    try:\n        success = run_full_test_suite()\n        sys.exit(0 if success else 1)\n        \n    except KeyboardInterrupt:\n        print(\"\\n\u23f9\ufe0f Test suite interrupted by user\")\n        sys.exit(1)\n        \n    except Exception as e:\n        print(f\"\\n\ud83d\udca5 Unexpected error: {e}\")\n        import traceback\n        traceback.print_exc()\n        sys.exit(1)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_suite.py",
      "tags": [
        "entry-point",
        "test-suite",
        "error-handling",
        "exit-codes",
        "remarkable",
        "testing",
        "exception-handling",
        "keyboard-interrupt"
      ],
      "updated_at": "2025-12-07T01:59:48.493269",
      "usage_example": "if __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function should be run in a test environment before making actual API calls to validate request formatting",
        "Review the generated JSON results file to understand specific differences and recommendations",
        "Ensure the test_results directory has appropriate write permissions",
        "The function returns a boolean that can be used for CI/CD pipeline integration to fail builds on critical issues",
        "Check the console output for a summary of issues before diving into the detailed JSON results",
        "The function handles exceptions gracefully and returns False on failure, making it suitable for automated testing workflows"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:23:33",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "pathlib",
        "typing",
        "uuid",
        "hashlib",
        "base64",
        "binascii"
      ],
      "description": "Executes a dry run comparison analysis of PDF upload requests between a simulated implementation and a real application, without making actual API calls.",
      "docstring": "Run dry run comparison analysis",
      "id": 2034,
      "imports": [
        "import json",
        "import time",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "import uuid",
        "import hashlib",
        "import base64",
        "import binascii",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "import json",
        "import time",
        "from pathlib import Path",
        "from typing import Dict, Any, List",
        "import uuid",
        "import hashlib",
        "import base64",
        "import binascii",
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 478,
      "line_start": 429,
      "name": "main_v77",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function orchestrates a comprehensive dry run testing workflow to validate PDF upload request formatting. It simulates PDF upload requests, compares them against expected real application behavior, identifies differences and critical issues, generates fix recommendations, and saves detailed results to a JSON file. The function is designed for debugging and validation purposes to ensure request compatibility before making actual API calls.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if no critical issues were found during the comparison (len(differences['critical_issues']) == 0), False if critical issues exist or if an exception occurred during execution.",
      "settings_required": [
        "DryRunUploadComparison class must be defined and available in the same module or imported",
        "Write permissions for creating 'test_results' directory in the parent directory of the script",
        "The DryRunUploadComparison class must implement methods: simulate_pdf_upload(), compare_with_real_app(), and generate_fix_recommendations()"
      ],
      "source_code": "def main():\n    \"\"\"Run dry run comparison analysis\"\"\"\n    try:\n        print(\"\ud83e\uddea DRY RUN UPLOAD COMPARISON\")\n        print(\"=\" * 50)\n        print(\"\ud83d\udeab NO API CALLS - ANALYSIS ONLY\")\n        \n        # Initialize comparison tool\n        comparator = DryRunUploadComparison()\n        \n        # Simulate our PDF upload\n        our_requests = comparator.simulate_pdf_upload(\"TestDocument_DryRun\")\n        \n        # Compare with real app\n        differences = comparator.compare_with_real_app(our_requests)\n        \n        # Generate recommendations\n        recommendations = comparator.generate_fix_recommendations(differences)\n        \n        # Save results\n        results = {\n            'timestamp': time.time(),\n            'our_requests': our_requests,\n            'differences': differences,\n            'recommendations': recommendations\n        }\n        \n        results_file = Path(__file__).parent / \"test_results\" / f\"dry_run_comparison_{int(time.time())}.json\"\n        results_file.parent.mkdir(exist_ok=True)\n        \n        with open(results_file, 'w') as f:\n            json.dump(results, f, indent=2, default=str)\n        \n        print(f\"\\n\ud83d\udcbe Dry run results saved to: {results_file}\")\n        \n        # Summary\n        print(f\"\\n\ud83d\udccb SUMMARY:\")\n        print(f\"   Header differences: {len(differences['header_differences'])}\")\n        print(f\"   Critical issues: {len(differences['critical_issues'])}\")\n        print(f\"   Recommendations: {len(recommendations)}\")\n        \n        print(f\"\\n\ud83d\udd27 RECOMMENDATIONS:\")\n        for i, rec in enumerate(recommendations, 1):\n            print(f\"   {i}. {rec}\")\n        \n        return len(differences['critical_issues']) == 0\n        \n    except Exception as e:\n        print(f\"\u274c Dry run comparison failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/dry_run_comparison.py",
      "tags": [
        "testing",
        "dry-run",
        "comparison",
        "validation",
        "pdf-upload",
        "debugging",
        "analysis",
        "request-simulation",
        "remarkable",
        "api-testing"
      ],
      "updated_at": "2025-12-07T01:59:48.492519",
      "usage_example": "if __name__ == '__main__':\n    # Run the dry run comparison\n    success = main()\n    \n    if success:\n        print('\u2705 All checks passed - no critical issues found')\n        exit(0)\n    else:\n        print('\u274c Critical issues detected - review recommendations')\n        exit(1)"
    },
    {
      "best_practices": [
        "This function assumes three test functions (test_imports, test_basic_functionality, test_placeholder_parsing) are defined in the same scope and return boolean values",
        "The function uses print statements for output, making it suitable for CLI usage but not for programmatic testing frameworks",
        "Consider refactoring to return a boolean or raise exceptions for better integration with automated testing frameworks",
        "The function does not handle exceptions from test functions, so any unhandled errors in tests will propagate",
        "All test functions must return True for success and False for failure for proper result tracking",
        "This is designed as a standalone validation script, typically run before enabling hybrid mode features"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:16:29",
      "decorators": [],
      "dependencies": [
        "matplotlib",
        "numpy",
        "networkx"
      ],
      "description": "A test orchestration function that runs a suite of validation tests for hybrid mode functionality, checking imports, basic functionality, and placeholder parsing.",
      "docstring": "Run all tests",
      "id": 2020,
      "imports": [
        "import matplotlib.pyplot as plt",
        "import numpy as np",
        "import networkx as nx",
        "from graphics_generator import GraphicsGenerator",
        "from graphics_generator import GraphicSpec",
        "from graphics_generator import GraphicType",
        "from hybrid_response_handler import HybridResponseHandler",
        "from hybrid_pdf_generator import HybridPDFGenerator",
        "from graphics_generator import GraphicsGenerator",
        "from hybrid_response_handler import HybridResponseHandler",
        "from hybrid_pdf_generator import HybridPDFGenerator",
        "from hybrid_response_handler import HybridResponseHandler"
      ],
      "imports_required": [
        "import matplotlib.pyplot as plt",
        "import numpy as np",
        "import networkx as nx",
        "from graphics_generator import GraphicsGenerator",
        "from graphics_generator import GraphicSpec",
        "from graphics_generator import GraphicType",
        "from hybrid_response_handler import HybridResponseHandler",
        "from hybrid_pdf_generator import HybridPDFGenerator"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 146,
      "line_start": 119,
      "name": "main_v76",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for validating a hybrid mode system that combines graphics generation, PDF generation, and response handling. It executes multiple test functions sequentially, tracks their success/failure status, and provides user-friendly feedback with instructions for enabling hybrid mode or installing dependencies based on test results.",
      "return_annotation": null,
      "return_explained": "This function does not explicitly return a value (implicitly returns None). It communicates test results through console output and prints success/failure messages along with usage instructions.",
      "settings_required": [
        "requirements-hybrid.txt file must be present for dependency installation",
        "test_imports(), test_basic_functionality(), and test_placeholder_parsing() functions must be defined in the same module",
        "graphics_generator module with GraphicsGenerator, GraphicSpec, and GraphicType classes",
        "hybrid_response_handler module with HybridResponseHandler class",
        "hybrid_pdf_generator module with HybridPDFGenerator class",
        "main.py script must exist with --file and --enable-hybrid-mode command-line arguments support"
      ],
      "source_code": "def main():\n    \"\"\"Run all tests\"\"\"\n    print(\"\ud83d\ude80 Hybrid Mode Validation Test\")\n    print(\"=\" * 50)\n    \n    success = True\n    \n    # Test imports\n    if not test_imports():\n        success = False\n    \n    # Test basic functionality\n    if not test_basic_functionality():\n        success = False\n    \n    # Test placeholder parsing\n    if not test_placeholder_parsing():\n        success = False\n    \n    print(\"\\n\" + \"=\" * 50)\n    if success:\n        print(\"\u2705 All tests passed! Hybrid mode is ready to use.\")\n        print(\"\\nTo enable hybrid mode:\")\n        print(\"   python main.py --file input.pdf --enable-hybrid-mode\")\n    else:\n        print(\"\u274c Some tests failed. Please check dependencies and installation.\")\n        print(\"\\nTo install dependencies:\")\n        print(\"   pip install -r requirements-hybrid.txt\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/test_hybrid_mode.py",
      "tags": [
        "testing",
        "validation",
        "test-suite",
        "hybrid-mode",
        "orchestration",
        "integration-testing",
        "graphics",
        "pdf-generation",
        "diagnostics"
      ],
      "updated_at": "2025-12-07T01:59:48.491819",
      "usage_example": "if __name__ == '__main__':\n    main()\n\n# Expected output:\n# \ud83d\ude80 Hybrid Mode Validation Test\n# ==================================================\n# [Test results from test_imports()]\n# [Test results from test_basic_functionality()]\n# [Test results from test_placeholder_parsing()]\n# ==================================================\n# \u2705 All tests passed! Hybrid mode is ready to use.\n# \n# To enable hybrid mode:\n#    python main.py --file input.pdf --enable-hybrid-mode"
    },
    {
      "best_practices": [
        "This function should be called using asyncio.run(main()) when executed as a script",
        "Ensure all test functions (test_compact_formatter, test_session_manager, demo_improvement_comparison) are defined before calling this function",
        "The function assumes synchronous test functions; if tests need to be async, they should be awaited",
        "Test folder './test' should exist before running the application with --watch-folder option",
        "Consider adding error handling to catch and report test failures individually rather than stopping on first failure",
        "The function currently runs tests sequentially; consider if any tests could benefit from parallel execution"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:04:15",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "pathlib"
      ],
      "description": "Asynchronous test runner function that executes a suite of tests for the E-Ink LLM Assistant application, including tests for compact formatting, session management, and improvement comparisons.",
      "docstring": "Run all tests",
      "id": 1993,
      "imports": [
        "import asyncio",
        "from pathlib import Path",
        "from compact_formatter import CompactResponseFormatter",
        "from session_manager import SessionManager"
      ],
      "imports_required": [
        "import asyncio",
        "from pathlib import Path",
        "from compact_formatter import CompactResponseFormatter",
        "from session_manager import SessionManager"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 209,
      "line_start": 195,
      "name": "main_v75",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for running comprehensive tests of the E-Ink LLM Assistant application. It orchestrates the execution of multiple test functions to validate the compact formatter, session manager, and demonstrate improvements. After successful test completion, it provides usage instructions for running the actual application with various command-line options.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by printing test results and usage instructions to stdout.",
      "settings_required": [
        "Requires test folder structure at './test' for watch-folder functionality",
        "Requires example.pdf file for file-based testing examples",
        "Requires compact_formatter.py module with CompactResponseFormatter class",
        "Requires session_manager.py module with SessionManager class",
        "Requires test functions: test_compact_formatter(), test_session_manager(), demo_improvement_comparison() to be defined in the same module or imported"
      ],
      "source_code": "async def main():\n    \"\"\"Run all tests\"\"\"\n    print(\"\ud83e\uddea E-INK LLM ASSISTANT - IMPROVEMENT TESTS\")\n    print(\"=\" * 70)\n    \n    # Run tests\n    test_compact_formatter()\n    test_session_manager()\n    demo_improvement_comparison()\n    \n    print(\"\\n\ud83c\udf89 All tests completed successfully!\")\n    print(\"\\nReady to run the improved E-Ink LLM Assistant:\")\n    print(\"  python main.py --watch-folder ./test\")\n    print(\"  python main.py --list-conversations\") \n    print(\"  python main.py --file example.pdf --verbose-mode\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/test_improvements.py",
      "tags": [
        "async",
        "testing",
        "test-runner",
        "e-ink",
        "llm-assistant",
        "integration-test",
        "test-suite",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.491151",
      "usage_example": "import asyncio\n\nasync def main():\n    \"\"\"Run all tests\"\"\"\n    print(\"\ud83e\uddea E-INK LLM ASSISTANT - IMPROVEMENT TESTS\")\n    print(\"=\" * 70)\n    \n    test_compact_formatter()\n    test_session_manager()\n    demo_improvement_comparison()\n    \n    print(\"\\n\ud83c\udf89 All tests completed successfully!\")\n    print(\"\\nReady to run the improved E-Ink LLM Assistant:\")\n    print(\"  python main.py --watch-folder ./test\")\n    print(\"  python main.py --list-conversations\") \n    print(\"  python main.py --file example.pdf --verbose-mode\")\n\nif __name__ == \"__main__\":\n    asyncio.run(main())"
    },
    {
      "best_practices": [
        "This function should only be called as the main entry point of the script, typically within an 'if __name__ == \"__main__\"' block",
        "Ensure the logger is properly configured before calling this function to capture all test output",
        "The test_setup() and test_single_vendor() functions must be implemented in the same module",
        "Run this test function before executing the full vendor_enrichment.py script to validate configuration",
        "The function exits gracefully on setup failure without raising exceptions, making it suitable for command-line usage",
        "Command-line arguments allow flexible testing of different vendors and collections without code modification"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 16:24:30",
      "decorators": [],
      "dependencies": [
        "argparse",
        "logging",
        "os",
        "sys",
        "re",
        "hybrid_rag_engine"
      ],
      "description": "Entry point function that orchestrates vendor enrichment testing by parsing command-line arguments, running setup validation, and executing a single vendor test against a ChromaDB collection.",
      "docstring": "Main test function",
      "id": 1257,
      "imports": [
        "import os",
        "import sys",
        "from hybrid_rag_engine import OneCo_hybrid_RAG",
        "import logging",
        "import argparse",
        "import re"
      ],
      "imports_required": [
        "import argparse",
        "import logging",
        "import os",
        "import sys",
        "import re",
        "from hybrid_rag_engine import OneCo_hybrid_RAG"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 163,
      "line_start": 137,
      "name": "main_v74",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main test harness for vendor enrichment functionality. It validates the testing environment setup, allows users to specify a vendor name and ChromaDB collection via command-line arguments, executes a test enrichment for a single vendor, and provides clear success/failure feedback with next steps. It's designed to be run before executing the full vendor enrichment pipeline to ensure everything is configured correctly.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including logging output to console/file and executing test functions. The success or failure is communicated through log messages rather than return values.",
      "settings_required": [
        "A 'logger' object must be configured and available in the module scope before calling this function",
        "The 'test_setup()' function must be defined in the same module",
        "The 'test_single_vendor()' function must be defined in the same module and accept vendor name and collection name as parameters",
        "ChromaDB collection '00_company_governance' (or specified collection) must exist and be accessible",
        "hybrid_rag_engine module must be properly installed and configured",
        "Any environment variables or configuration required by hybrid_rag_engine (e.g., API keys, database connections)"
      ],
      "source_code": "def main():\n    \"\"\"Main test function\"\"\"\n    import argparse\n    \n    parser = argparse.ArgumentParser(description='Test vendor enrichment')\n    parser.add_argument('--vendor', type=str, \n                       default='Merck',\n                       help='Vendor name to test')\n    parser.add_argument('--collection', type=str,\n                       default='00_company_governance',\n                       help='ChromaDB collection to search')\n    \n    args = parser.parse_args()\n    \n    if not test_setup():\n        logger.error(\"Setup failed\")\n        return\n    \n    logger.info(\"\\n\u2713 Setup complete\")\n    \n    success = test_single_vendor(args.vendor, args.collection)\n    \n    if success:\n        logger.info(\"\\n\u2705 Test completed successfully!\")\n        logger.info(\"You can now run the full enrichment with: python vendor_enrichment.py\")\n    else:\n        logger.error(\"\\n\u274c Test failed - check logs above\")",
      "source_file": "/tf/active/vicechatdev/find_email/test_enrichment.py",
      "tags": [
        "testing",
        "vendor-enrichment",
        "command-line",
        "argparse",
        "chromadb",
        "rag",
        "validation",
        "entry-point",
        "main-function",
        "test-harness"
      ],
      "updated_at": "2025-12-07T01:59:48.489942",
      "usage_example": "# Run from command line with default arguments (tests 'Merck' vendor):\n# python script_name.py\n\n# Run with custom vendor:\n# python script_name.py --vendor \"Pfizer\"\n\n# Run with custom vendor and collection:\n# python script_name.py --vendor \"Johnson & Johnson\" --collection \"01_vendor_data\"\n\n# In code (if calling programmatically):\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Ensure OPENAI_API_KEY is set in environment variables before running",
        "Index documents before executing this function to avoid errors",
        "The full_reading_example() is commented out by default due to performance considerations - uncomment only when needed",
        "This function is designed to be called as the main entry point of an example/demo script",
        "Error messages provide clear guidance on setup requirements if execution fails",
        "Each example function should be independently executable and handle its own errors gracefully"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:29:05",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "document_indexer",
        "rag_engine",
        "config"
      ],
      "description": "Orchestrates and executes a series of example demonstrations for the DocChat system, including document indexing, RAG queries, and conversation modes.",
      "docstring": "Run all examples",
      "id": 261,
      "imports": [
        "from pathlib import Path",
        "from document_indexer import DocumentIndexer",
        "from rag_engine import DocChatRAG",
        "import config"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from document_indexer import DocumentIndexer",
        "from rag_engine import DocChatRAG",
        "import config"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 198,
      "line_start": 167,
      "name": "main_v73",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a comprehensive demonstration runner for the DocChat application. It sequentially executes multiple example functions to showcase different features: document indexing, basic RAG (Retrieval-Augmented Generation), extensive mode querying, full reading mode (commented out by default), and conversation with history. It includes error handling and provides user feedback about setup requirements if failures occur.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It produces side effects by printing output to the console and executing example functions that demonstrate the DocChat system's capabilities.",
      "settings_required": [
        "OPENAI_API_KEY environment variable must be set (typically in .env file)",
        "Documents must be indexed before running examples",
        "All project dependencies must be installed",
        "config.py file must be present with appropriate configuration",
        "The following functions must be defined in the same module or imported: index_documents_example(), basic_rag_example(), extensive_mode_example(), conversation_example()"
      ],
      "source_code": "def main():\n    \"\"\"Run all examples\"\"\"\n    print(\"=\" * 80)\n    print(\"DocChat - Example Usage\")\n    print(\"=\" * 80)\n    \n    try:\n        # 1. Index documents\n        index_documents_example()\n        \n        # 2. Basic RAG\n        basic_rag_example()\n        \n        # 3. Extensive mode\n        extensive_mode_example()\n        \n        # 4. Full reading mode (commented out by default as it's slow)\n        # full_reading_example()\n        \n        # 5. Conversation with history\n        conversation_example()\n        \n        print(\"\\n\" + \"=\" * 80)\n        print(\"Examples completed!\")\n        print(\"=\" * 80)\n        \n    except Exception as e:\n        print(f\"\\n\u274c Error: {e}\")\n        print(\"\\nMake sure:\")\n        print(\"  1. You have set OPENAI_API_KEY in .env\")\n        print(\"  2. You have indexed some documents\")\n        print(\"  3. Dependencies are installed\")",
      "source_file": "/tf/active/vicechatdev/docchat/example_usage.py",
      "tags": [
        "demo",
        "examples",
        "orchestration",
        "RAG",
        "document-chat",
        "testing",
        "showcase",
        "runner",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.489223",
      "usage_example": "if __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function depends on test_graph_api_access() and provide_admin_instructions() being defined elsewhere in the codebase",
        "The function is designed for interactive CLI use with formatted console output",
        "Should be called as the main entry point of the diagnostic script",
        "Does not handle exceptions - ensure dependent functions have proper error handling",
        "Uses emoji characters which may not display correctly in all terminal environments",
        "Consider redirecting output to a file if running in environments without emoji support"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:20:06",
      "decorators": [],
      "dependencies": [
        "requests",
        "json"
      ],
      "description": "Diagnostic function that tests SharePoint tenant configuration by checking Microsoft Graph API access and provides recommendations based on the results.",
      "docstring": "Main diagnostic function.",
      "id": 233,
      "imports": [
        "import requests",
        "import json"
      ],
      "imports_required": [
        "import requests",
        "import json"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 245,
      "line_start": 219,
      "name": "main_v72",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a diagnostic tool that verifies whether Microsoft Graph API or SharePoint REST API can be accessed with app-only authentication. It tests Graph API connectivity, displays results with visual indicators (emojis), provides admin instructions, and recommends next steps based on whether Graph API access is successful. This is useful for troubleshooting SharePoint sync application authentication issues at the tenant level.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by printing diagnostic information, test results, and recommendations to stdout.",
      "settings_required": [
        "Requires test_graph_api_access() function to be defined in the same module or imported",
        "Requires provide_admin_instructions() function to be defined in the same module or imported",
        "May require Microsoft Graph API credentials (client ID, client secret, tenant ID) configured for test_graph_api_access() to work",
        "May require environment variables or configuration file for authentication (depends on test_graph_api_access() implementation)"
      ],
      "source_code": "def main():\n    \"\"\"Main diagnostic function.\"\"\"\n    print(\"SharePoint Tenant Configuration Checker\")\n    print(\"=\" * 50)\n    \n    # Test if Graph API approach works\n    graph_works = test_graph_api_access()\n    \n    if graph_works:\n        print(\"\\n\ud83c\udf89 SUCCESS: Microsoft Graph API works!\")\n        print(\"This means we can use Graph API instead of SharePoint REST API.\")\n        print(\"I can modify the sync app to use Graph API as a workaround.\")\n    else:\n        print(\"\\n\u274c Microsoft Graph API also has issues.\")\n        print(\"This confirms it's a tenant-level app-only authentication problem.\")\n    \n    provide_admin_instructions()\n    \n    print(\"\\n\" + \"=\" * 60)\n    print(\"\ud83d\udca1 **RECOMMENDATION**\")\n    print(\"=\" * 60)\n    if graph_works:\n        print(\"Since Graph API works, I can create a Graph-based version\")\n        print(\"of the sync application that bypasses SharePoint REST API issues.\")\n    else:\n        print(\"You need SharePoint admin to enable app-only authentication\")\n        print(\"at the tenant level before the sync application will work.\")",
      "source_file": "/tf/active/vicechatdev/SPFCsync/check_tenant_config.py",
      "tags": [
        "diagnostic",
        "sharepoint",
        "microsoft-graph",
        "authentication",
        "tenant-configuration",
        "troubleshooting",
        "app-only-auth",
        "cli",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.488485",
      "usage_example": "# Assuming test_graph_api_access() and provide_admin_instructions() are defined\n# Example standalone execution:\nif __name__ == '__main__':\n    main()\n\n# Or call directly:\nmain()\n\n# Expected output:\n# SharePoint Tenant Configuration Checker\n# ==================================================\n# [Test results from test_graph_api_access()]\n# \ud83c\udf89 SUCCESS: Microsoft Graph API works!\n# [Additional instructions and recommendations]"
    },
    {
      "best_practices": [
        "This function depends on three external functions: load_config(), test_azure_token(), and test_sharepoint_token() which must be defined in the same module",
        "Use the return value as a system exit code for proper CLI integration",
        "Ensure Azure AD application has proper permissions (Sites.Read.All) before running",
        "The function prints sensitive information (partial client ID), ensure output is not logged in production",
        "Configuration should be stored securely, preferably using environment variables or encrypted config files",
        "The function assumes exactly 2 tests; if adding more tests, update the total_tests variable accordingly"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:18:00",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "Orchestrates a comprehensive SharePoint connection diagnostic tool that validates Azure AD authentication and SharePoint access by running multiple tests and reporting results.",
      "docstring": "Run all diagnostics.",
      "id": 227,
      "imports": [
        "import requests",
        "import json",
        "import base64",
        "from urllib.parse import quote",
        "import os",
        "import sys"
      ],
      "imports_required": [
        "import requests",
        "import json",
        "import base64",
        "from urllib.parse import quote",
        "import os",
        "import sys"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 227,
      "line_start": 189,
      "name": "main_v71",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for a diagnostic tool that verifies SharePoint connectivity. It loads configuration, displays connection parameters, executes authentication tests (Azure token and SharePoint token), and provides detailed feedback on test results with troubleshooting guidance. Returns 0 on success or 1 on failure, making it suitable for use as a CLI tool exit code.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if all diagnostic tests pass successfully, or 1 if any tests fail or configuration cannot be loaded. This follows standard Unix exit code conventions where 0 indicates success.",
      "settings_required": [
        "SHAREPOINT_SITE_URL - SharePoint site URL to connect to",
        "AZURE_CLIENT_ID - Azure AD application client ID",
        "AZURE_CLIENT_SECRET - Azure AD application client secret",
        "Configuration must be loadable via load_config() function",
        "Azure AD app must have Sites.Read.All permissions with admin consent granted"
      ],
      "source_code": "def main():\n    \"\"\"Run all diagnostics.\"\"\"\n    print(\"SharePoint Connection Diagnostic Tool\")\n    print(\"=\" * 50)\n    \n    config = load_config()\n    if not config:\n        print(\"\u274c Could not load configuration\")\n        return 1\n    \n    print(f\"SharePoint Site: {config.get('SHAREPOINT_SITE_URL', 'Not set')}\")\n    print(f\"Client ID: {config.get('AZURE_CLIENT_ID', 'Not set')[:8]}...\")\n    print(f\"Client Secret: {'Set' if config.get('AZURE_CLIENT_SECRET') else 'Not set'}\")\n    print()\n    \n    # Run tests\n    tests_passed = 0\n    total_tests = 2\n    \n    if test_azure_token():\n        tests_passed += 1\n    \n    if test_sharepoint_token():\n        tests_passed += 1\n    \n    print(\"\\n\" + \"=\" * 50)\n    print(f\"Diagnostic Results: {tests_passed}/{total_tests} tests passed\")\n    \n    if tests_passed == total_tests:\n        print(\"\ud83c\udf89 All diagnostics passed! SharePoint connection should work.\")\n        return 0\n    else:\n        print(\"\u274c Some diagnostics failed. Please check the guidance above.\")\n        print(\"\\n\ud83d\udccb Common Solutions:\")\n        print(\"1. Verify Azure AD app permissions (Sites.Read.All)\")\n        print(\"2. Ensure admin consent is granted\")\n        print(\"3. Check client ID and secret are correct\")\n        print(\"4. Verify SharePoint site URL is accessible\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/SPFCsync/diagnose_sharepoint.py",
      "tags": [
        "diagnostics",
        "sharepoint",
        "azure-ad",
        "authentication",
        "testing",
        "cli-tool",
        "connection-validation",
        "oauth",
        "troubleshooting",
        "entry-point"
      ],
      "updated_at": "2025-12-07T01:59:48.487745",
      "usage_example": "# Assuming load_config(), test_azure_token(), and test_sharepoint_token() are defined\n# and environment variables or config file are set up\n\nif __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)\n\n# Or simply:\n# python diagnostic_tool.py\n# The function will print diagnostic results and return appropriate exit code"
    },
    {
      "best_practices": [
        "This function should only be called as the script's entry point, typically within an 'if __name__ == \"__main__\"' block",
        "Passwords passed via command-line arguments may be visible in process lists; consider using environment variables or secure input methods for production use",
        "Ensure the test_acl_functions function is properly defined before calling main()",
        "The --path argument should point to a location where the user has appropriate permissions to create and modify ACLs",
        "Handle keyboard interrupts (Ctrl+C) gracefully if adding to production code",
        "Consider adding logging configuration before calling test_acl_functions for better debugging"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:36:46",
      "decorators": [],
      "dependencies": [
        "argparse",
        "FC_api"
      ],
      "description": "Entry point function for a FileCloud ACL management test script that parses command-line arguments and initiates ACL testing.",
      "docstring": "Main function for the test script.",
      "id": 113,
      "imports": [
        "import os",
        "import sys",
        "import argparse",
        "import json",
        "import logging",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List",
        "from typing import Optional",
        "from FC_api import FileCloudAPI"
      ],
      "imports_required": [
        "import argparse",
        "from FC_api import FileCloudAPI"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 137,
      "line_start": 127,
      "name": "main_v70",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no parameters. All inputs are collected via command-line arguments using argparse."
      },
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a test script that validates FileCloud Access Control List (ACL) management functionality. It sets up an argument parser to collect server connection details and test parameters from the command line, then invokes the test_acl_functions with the provided credentials and path. This is designed to be called when the script is executed directly, providing a CLI interface for testing FileCloud ACL operations.",
      "return_annotation": null,
      "return_explained": "Returns None (implicitly). The function does not return any value; it executes the test suite and exits. Any results or errors are handled by the test_acl_functions that it calls.",
      "settings_required": [
        "FileCloud server URL must be accessible",
        "Valid FileCloud username and password credentials",
        "The test_acl_functions function must be defined in the same module or imported",
        "Network connectivity to the FileCloud server",
        "Appropriate permissions on the FileCloud account to manage ACLs"
      ],
      "source_code": "def main():\n    \"\"\"Main function for the test script.\"\"\"\n    parser = argparse.ArgumentParser(description='Test FileCloud ACL management functions')\n    parser.add_argument('--server', '-s', required=True, help='FileCloud server URL')\n    parser.add_argument('--username', '-u', required=True, help='Username for authentication')\n    parser.add_argument('--password', '-p', required=True, help='Password for authentication')\n    parser.add_argument('--path', default='/test_acl', help='Path to use for ACL testing (default: /test_acl)')\n    \n    args = parser.parse_args()\n    \n    test_acl_functions(args.server, args.username, args.password, args.path)",
      "source_file": "/tf/active/vicechatdev/test_acl_functions.py",
      "tags": [
        "cli",
        "command-line",
        "testing",
        "filecloud",
        "acl",
        "access-control",
        "authentication",
        "entry-point",
        "argparse",
        "test-script"
      ],
      "updated_at": "2025-12-07T01:59:48.487098",
      "usage_example": "# Save the script as test_filecloud_acl.py\n# Run from command line:\n# python test_filecloud_acl.py --server https://filecloud.example.com --username admin --password secret123 --path /test_acl\n\n# Or with short flags:\n# python test_filecloud_acl.py -s https://filecloud.example.com -u admin -p secret123\n\n# Using default path:\n# python test_filecloud_acl.py -s https://filecloud.example.com -u admin -p secret123\n\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Always provide required --username and --password arguments when running as script",
        "The function ensures cleanup is called even if processing fails by using try-finally block",
        "Exits with status code 1 if connection to FileCloud fails",
        "Credentials are passed via command-line arguments - consider using environment variables or secure credential storage for production use",
        "The FileCloudEmailProcessor class must be defined in the same module or imported before calling main()",
        "Ensure FC_api module and FileCloudAPI class are available in the Python path"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:45:14",
      "decorators": [],
      "dependencies": [
        "argparse",
        "sys",
        "extract_msg",
        "os",
        "mimetypes",
        "logging",
        "email",
        "traceback",
        "tempfile",
        "base64",
        "shutil",
        "subprocess",
        "pathlib",
        "datetime",
        "FC_api",
        "html",
        "re",
        "reportlab",
        "time",
        "PIL",
        "fitz",
        "PyPDF2"
      ],
      "description": "Entry point function that parses command-line arguments and orchestrates the FileCloud email processing workflow to find, download, and convert .msg files.",
      "docstring": "Main function to run as script",
      "id": 133,
      "imports": [
        "import extract_msg",
        "import os",
        "import mimetypes",
        "import logging",
        "import email",
        "from email.message import EmailMessage",
        "from email.utils import formatdate",
        "from email.utils import formataddr",
        "from email.headerregistry import Address",
        "import email.charset",
        "import traceback",
        "import tempfile",
        "import sys",
        "import base64",
        "import shutil",
        "import subprocess",
        "from pathlib import Path",
        "from datetime import datetime",
        "import argparse",
        "from FC_api import FileCloudAPI",
        "import html",
        "import base64",
        "import re",
        "import re",
        "import re",
        "import html",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Paragraph",
        "from reportlab.platypus import Spacer",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "from reportlab.lib.styles import ParagraphStyle",
        "from reportlab.lib.units import inch",
        "from reportlab.lib import colors",
        "import re",
        "import html",
        "import time",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Image as RLImage",
        "from reportlab.platypus import Paragraph",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "from reportlab.lib.units import inch",
        "from PIL import Image",
        "import fitz",
        "from reportlab.lib.pagesizes import letter",
        "from reportlab.platypus import SimpleDocTemplate",
        "from reportlab.platypus import Paragraph",
        "from reportlab.platypus import Spacer",
        "from reportlab.lib.styles import getSampleStyleSheet",
        "import html",
        "import base64",
        "from PyPDF2 import PdfMerger"
      ],
      "imports_required": [
        "import argparse",
        "import sys",
        "from FC_api import FileCloudAPI"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1535,
      "line_start": 1518,
      "name": "main_v69",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main script entry point for a FileCloud email processor application. It sets up argument parsing for server connection details, authenticates with FileCloud, and initiates the processing of all .msg files found in a specified path. It handles connection lifecycle including cleanup on exit.",
      "return_annotation": null,
      "return_explained": "No explicit return value. The function exits with sys.exit(1) if connection fails, otherwise completes normally after processing.",
      "settings_required": [
        "FileCloud server URL (default: https://filecloud.vicebio.com)",
        "FileCloud username (required command-line argument)",
        "FileCloud password (required command-line argument)",
        "FileCloudEmailProcessor class must be defined in the same module",
        "FC_api module must be available with FileCloudAPI class"
      ],
      "source_code": "def main():\n    \"\"\"Main function to run as script\"\"\"\n    parser = argparse.ArgumentParser(description=\"FileCloud Email Processor - Find, download, and convert .msg files\")\n    parser.add_argument(\"--server\", default=\"https://filecloud.vicebio.com\", help=\"FileCloud server URL\")\n    parser.add_argument(\"--username\", required=True, help=\"FileCloud username\")\n    parser.add_argument(\"--password\", required=True, help=\"FileCloud password\")\n    parser.add_argument(\"--path\", default=\"/\", help=\"Start path in FileCloud to search for .msg files\")\n    \n    args = parser.parse_args()\n    \n    processor = FileCloudEmailProcessor(args.server, args.username, args.password)\n    try:\n        if processor.connect():\n            processor.process_all_msg_files(args.path)\n        else:\n            sys.exit(1)\n    finally:\n        processor.cleanup()",
      "source_file": "/tf/active/vicechatdev/msg_to_eml.py",
      "tags": [
        "cli",
        "command-line",
        "entry-point",
        "filecloud",
        "email-processing",
        "msg-files",
        "file-conversion",
        "authentication",
        "script-runner",
        "argument-parsing"
      ],
      "updated_at": "2025-12-07T01:59:48.486374",
      "usage_example": "# Run from command line:\n# python script.py --username myuser --password mypass --path /emails\n\n# Or call directly in code:\nif __name__ == '__main__':\n    main()\n\n# Command-line arguments:\n# --server: FileCloud server URL (optional, defaults to https://filecloud.vicebio.com)\n# --username: FileCloud username (required)\n# --password: FileCloud password (required)\n# --path: Start path to search for .msg files (optional, defaults to /)"
    },
    {
      "best_practices": [
        "Always set OPENAI_API_KEY environment variable or use --api-key argument before running",
        "Use --verbose flag for debugging and detailed error messages",
        "For production use, configure cloud services via JSON config files rather than command-line arguments",
        "Install appropriate dependencies based on mode: requirements-remarkable.txt for reMarkable, msal/requests for OneDrive",
        "Use --no-existing flag when starting watcher to avoid processing old files",
        "Enable compact mode (default) for optimal e-ink display rendering",
        "Use conversation IDs to maintain context across multiple document exchanges",
        "Set reasonable --max-pages limit to avoid processing extremely large PDFs",
        "For mixed mode, ensure both OneDrive and reMarkable Cloud are properly authenticated",
        "Use --list-conversations to track active sessions before starting new ones",
        "Handle KeyboardInterrupt gracefully - the application is designed for long-running operation",
        "Check for REMARKABLE_AVAILABLE, ONEDRIVE_AVAILABLE, and MIXED_AVAILABLE flags before using respective modes",
        "Use --generate-timeline to create visual summaries of conversation history",
        "Enable hybrid mode (default) for rich responses with both text and graphics"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when --generate-timeline argument is used",
          "import": "from conversation_timeline import ConversationTimelineGenerator",
          "optional": true
        },
        {
          "condition": "only when using reMarkable Cloud mode (--mode remarkable/both/mixed or --remarkable-document-id)",
          "import": "from remarkable_processor import RemarkableEInkProcessor",
          "optional": true
        },
        {
          "condition": "only when processing single reMarkable document (--remarkable-document-id)",
          "import": "from remarkable_processor import process_single_remarkable_file",
          "optional": true
        },
        {
          "condition": "only when using OneDrive mode (--mode onedrive/both/mixed)",
          "import": "from onedrive_client import OneDriveClient",
          "optional": true
        },
        {
          "condition": "only when using OneDrive mode (--mode onedrive/both/mixed)",
          "import": "from onedrive_client import OneDriveProcessor",
          "optional": true
        },
        {
          "condition": "only when using mixed cloud mode (--mode mixed)",
          "import": "from mixed_cloud_processor import MixedCloudProcessor",
          "optional": true
        },
        {
          "condition": "only when using mixed cloud mode (--mode mixed)",
          "import": "from mixed_cloud_processor import create_mixed_processor",
          "optional": true
        },
        {
          "condition": "only when using mixed cloud mode (--mode mixed)",
          "import": "from mixed_cloud_processor import create_remarkable_session",
          "optional": true
        },
        {
          "condition": "only when verbose mode is enabled (--verbose)",
          "import": "import traceback",
          "optional": true
        }
      ],
      "created_at": "2025-12-07 00:00:30",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "argparse",
        "sys",
        "os",
        "json",
        "pathlib",
        "dotenv",
        "processor",
        "session_manager",
        "remarkable_processor",
        "onedrive_client",
        "mixed_cloud_processor",
        "conversation_timeline",
        "traceback",
        "msal",
        "requests"
      ],
      "description": "Async entry point for an E-Ink LLM Assistant that processes handwritten/drawn content using AI vision models, supporting local files, reMarkable Cloud, and OneDrive integration.",
      "docstring": null,
      "id": 1981,
      "imports": [
        "import asyncio",
        "import argparse",
        "import sys",
        "import os",
        "import json",
        "from pathlib import Path",
        "from dotenv import load_dotenv",
        "from processor import EInkLLMProcessor",
        "from processor import process_single_file",
        "from session_manager import SessionManager",
        "from remarkable_processor import RemarkableEInkProcessor",
        "from remarkable_processor import process_single_remarkable_file",
        "from onedrive_client import OneDriveClient",
        "from onedrive_client import OneDriveProcessor",
        "from mixed_cloud_processor import MixedCloudProcessor",
        "from mixed_cloud_processor import create_mixed_processor",
        "from conversation_timeline import ConversationTimelineGenerator",
        "import traceback",
        "from mixed_cloud_processor import create_remarkable_session"
      ],
      "imports_required": [
        "import asyncio",
        "import argparse",
        "import sys",
        "import os",
        "import json",
        "from pathlib import Path",
        "from dotenv import load_dotenv",
        "from processor import EInkLLMProcessor",
        "from processor import process_single_file",
        "from session_manager import SessionManager"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 643,
      "line_start": 146,
      "name": "main_v68",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This is the main CLI application entry point that orchestrates an AI-powered document processing system designed for e-ink devices. It supports multiple modes: single file processing, file watching, reMarkable Cloud integration, OneDrive integration, and mixed cloud modes. The system processes handwritten notes, drawings, and PDFs using OpenAI's GPT-4 Vision API, maintains conversation history, generates responses optimized for e-ink displays, and can sync with cloud storage services. It includes features like conversation management, timeline generation, multi-page PDF processing, annotation detection, and hybrid text/graphics generation.",
      "return_annotation": null,
      "return_explained": "This function does not return a value. It runs until interrupted by the user (KeyboardInterrupt) or exits with sys.exit() on errors. Side effects include processing files, generating PDFs, updating databases, and syncing with cloud services.",
      "settings_required": [
        "OPENAI_API_KEY environment variable (or --api-key argument) for GPT-4 Vision API access",
        "eink_sessions.db SQLite database file for conversation tracking (auto-created)",
        "eink_llm.log file for activity logging (auto-created)",
        "Optional: remarkable_config.json for reMarkable Cloud settings (--remarkable-config)",
        "Optional: onedrive_config.json for OneDrive settings (--onedrive-config)",
        "Optional: Azure App Registration client ID for OneDrive access (--onedrive-client-id)",
        "Optional: reMarkable one-time authentication code (--remarkable-one-time-code)",
        "Watch folder directory (default: ./watch) for local file monitoring",
        "requirements-remarkable.txt dependencies for reMarkable Cloud integration",
        "requirements-mixed.txt dependencies for mixed cloud mode",
        "msal and requests packages for OneDrive integration"
      ],
      "source_code": "async def main():\n    parser = argparse.ArgumentParser(\n        description=\"E-Ink LLM Assistant - Process handwritten/drawn content with AI\",\n        formatter_class=argparse.RawDescriptionHelpFormatter,\n        epilog=\"\"\"\nExamples:\n  # Start file watcher (default mode)\n  python main.py --watch-folder ./documents\n\n  # Process a single file\n  python main.py --file drawing.pdf\n\n  # Start watcher with custom API key\n  python main.py --api-key sk-... --watch-folder ./input\n\n  # Continue existing conversation\n  python main.py --conversation-id conv_20250731_143022_a8f9c2d1 --file new_question.pdf\n\n  # Use verbose formatting instead of compact\n  python main.py --verbose-mode --file document.pdf\n\n  # List active conversations\n  python main.py --list-conversations\n\nEnvironment Variables:\n  OPENAI_API_KEY    OpenAI API key for GPT-4 Vision models\n\nSupported File Types:\n  PDF, JPG, JPEG, PNG, GIF, BMP, TIFF, WEBP\n\nOutput:\n  - Response PDFs: RESPONSE_[conv_id]_ex[num]_[filename].pdf\n  - Error reports: ERROR_[conv_id]_ex[num]_[filename].pdf\n  - Activity logs: eink_llm.log\n  - Session database: eink_sessions.db\n        \"\"\"\n    )\n    \n    # Mode selection\n    mode_group = parser.add_mutually_exclusive_group()\n    mode_group.add_argument(\n        '--file', '-f',\n        type=str,\n        help='Process a single file instead of watching a folder'\n    )\n    mode_group.add_argument(\n        '--watch-folder', '-w',\n        type=str,\n        help='Folder to watch for new files (default: ./watch)'\n    )\n    mode_group.add_argument(\n        '--remarkable-document-id',\n        type=str,\n        help='Process a single document from reMarkable Cloud by ID'\n    )\n    \n    # Operation mode\n    parser.add_argument(\n        '--mode',\n        choices=['local', 'remarkable', 'onedrive', 'both', 'mixed'],\n        default='local',\n        help='Processing mode: local file watching, reMarkable Cloud, OneDrive, both, or mixed (mixed = monitors both OneDrive and reMarkable for input, outputs to OneDrive) (default: local)'\n    )\n    \n    # Configuration options\n    parser.add_argument(\n        '--api-key',\n        type=str,\n        help='OpenAI API key (can also use OPENAI_API_KEY environment variable)'\n    )\n    parser.add_argument(\n        '--no-existing',\n        action='store_true',\n        help='Skip processing existing files when starting watcher'\n    )\n    parser.add_argument(\n        '--verbose', '-v',\n        action='store_true',\n        help='Enable verbose output'\n    )\n    parser.add_argument(\n        '--conversation-id',\n        type=str,\n        help='Continue existing conversation by ID (default: create new)'\n    )\n    parser.add_argument(\n        '--compact-mode',\n        action='store_true',\n        default=True,\n        help='Use compact response formatting for e-ink optimization (default: enabled)'\n    )\n    parser.add_argument(\n        '--verbose-mode',\n        action='store_true',\n        help='Use verbose response formatting (disables compact mode)'\n    )\n    parser.add_argument(\n        '--no-auto-detect',\n        action='store_true',\n        help='Disable automatic session detection from PDF metadata/content'\n    )\n    parser.add_argument(\n        '--no-multi-page',\n        action='store_true',\n        help='Disable multi-page PDF processing (process only first page)'\n    )\n    parser.add_argument(\n        '--max-pages',\n        type=int,\n        default=50,\n        help='Maximum pages to process in multi-page PDFs (default: 50)'\n    )\n    parser.add_argument(\n        '--no-editing-workflow',\n        action='store_true',\n        help='Disable annotation detection and text editing workflow'\n    )\n    parser.add_argument(\n        '--enable-hybrid-mode',\n        action='store_true',\n        default=True,\n        help='Enable hybrid mode with text and graphics generation (default: enabled)'\n    )\n    parser.add_argument(\n        '--no-hybrid-mode',\n        action='store_true',\n        help='Disable hybrid mode, use text-only responses'\n    )\n    parser.add_argument(\n        '--list-conversations',\n        action='store_true',\n        help='List active conversations and exit'\n    )\n    parser.add_argument(\n        '--generate-timeline',\n        type=str,\n        help='Generate conversation timeline PDF for specified conversation ID'\n    )\n    \n    # reMarkable Cloud specific options\n    remarkable_group = parser.add_argument_group('reMarkable Cloud Options')\n    remarkable_group.add_argument(\n        '--remarkable-config',\n        type=str,\n        help='Path to JSON config file for reMarkable Cloud settings'\n    )\n    remarkable_group.add_argument(\n        '--remarkable-watch-folder',\n        type=str,\n        default='/E-Ink LLM Input',\n        help='Folder path in reMarkable Cloud to watch for input files (default: /E-Ink LLM Input)'\n    )\n    remarkable_group.add_argument(\n        '--remarkable-output-folder',\n        type=str,\n        default='/E-Ink LLM Output',\n        help='Folder path in reMarkable Cloud to upload responses (default: /E-Ink LLM Output)'\n    )\n    remarkable_group.add_argument(\n        '--remarkable-one-time-code',\n        type=str,\n        help='One-time code from reMarkable account for initial authentication'\n    )\n    remarkable_group.add_argument(\n        '--remarkable-poll-interval',\n        type=int,\n        default=60,\n        help='Seconds between checks for new files in reMarkable Cloud (default: 60)'\n    )\n    \n    # OneDrive specific options\n    onedrive_group = parser.add_argument_group('OneDrive Options')\n    onedrive_group.add_argument(\n        '--onedrive-config',\n        type=str,\n        help='Path to JSON config file for OneDrive settings'\n    )\n    onedrive_group.add_argument(\n        '--onedrive-watch-folder',\n        type=str,\n        default='/E-Ink LLM Input',\n        help='Folder path in OneDrive to watch for input files (default: /E-Ink LLM Input)'\n    )\n    onedrive_group.add_argument(\n        '--onedrive-output-folder',\n        type=str,\n        default='/E-Ink LLM Output',\n        help='Folder path in OneDrive to upload responses (default: /E-Ink LLM Output)'\n    )\n    onedrive_group.add_argument(\n        '--onedrive-poll-interval',\n        type=int,\n        default=60,\n        help='Seconds between checks for new files in OneDrive (default: 60)'\n    )\n    onedrive_group.add_argument(\n        '--onedrive-client-id',\n        type=str,\n        help='Azure App Registration client ID for OneDrive access'\n    )\n    \n    args = parser.parse_args()\n    \n    # Handle timeline generation\n    if args.generate_timeline:\n        from conversation_timeline import ConversationTimelineGenerator\n        \n        session_manager = SessionManager()\n        timeline_generator = ConversationTimelineGenerator()\n        \n        # Check if conversation exists\n        conversation = session_manager.get_conversation(args.generate_timeline)\n        if not conversation:\n            print(f\"\u274c Error: Conversation '{args.generate_timeline}' not found.\")\n            sys.exit(1)\n        \n        print(f\"\ud83d\udcca Generating timeline for conversation: {args.generate_timeline}\")\n        timeline_path = await timeline_generator.generate_timeline_pdf(\n            conversation_id=args.generate_timeline,\n            session_manager=session_manager\n        )\n        \n        if timeline_path:\n            print(f\"\u2705 Timeline generated successfully: {timeline_path}\")\n        else:\n            print(\"\u274c Error: Failed to generate timeline PDF\")\n            sys.exit(1)\n        \n        sys.exit(0)\n    \n    # Handle conversation listing\n    if args.list_conversations:\n        session_manager = SessionManager()\n        conversations = session_manager.list_active_conversations()\n        \n        if conversations:\n            print(\"\ud83d\uddc2\ufe0f  Active Conversations:\")\n            print(\"=\" * 70)\n            for conv in conversations:\n                print(f\"\ud83c\udd94 {conv['conversation_id']}\")\n                print(f\"   \ud83d\udcc5 Created: {conv['created_at']}\")\n                print(f\"   \ud83d\udd50 Last activity: {conv['last_activity']}\")\n                print(f\"   \ud83d\udcac Exchanges: {conv['total_exchanges']}\")\n                if conv['user_id']:\n                    print(f\"   \ud83d\udc64 User: {conv['user_id']}\")\n                print()\n        else:\n            print(\"\ud83d\udcdd No active conversations found.\")\n        \n        sys.exit(0)\n    \n    # Determine compact mode setting\n    compact_mode = args.compact_mode and not args.verbose_mode\n    \n    # Determine hybrid mode setting\n    enable_hybrid_mode = args.enable_hybrid_mode and not args.no_hybrid_mode\n    \n    # Check if remarkable mode is requested but not available\n    if (args.mode in ['remarkable', 'both'] or args.remarkable_document_id) and not REMARKABLE_AVAILABLE:\n        print(\"\u274c Error: reMarkable Cloud integration not available!\")\n        print(\"   Install with: pip install -r requirements-remarkable.txt\")\n        print(\"   Or use local mode: python main.py --mode local\")\n        sys.exit(1)\n    \n    # Check if OneDrive mode is requested but not available  \n    if args.mode in ['onedrive', 'both', 'mixed'] and not ONEDRIVE_AVAILABLE:\n        print(\"\u274c Error: OneDrive integration not available!\")\n        print(\"   Install with: pip install msal requests\")\n        print(\"   Or use local mode: python main.py --mode local\")\n        sys.exit(1)\n    \n    # Check if mixed mode is requested but not available\n    if args.mode == 'mixed' and not MIXED_AVAILABLE:\n        print(\"\u274c Error: Mixed cloud integration not available!\")\n        print(\"   Install dependencies with: pip install -r requirements-mixed.txt\")\n        print(\"   Or use setup script: ./setup_mixed_mode.sh\")\n        print(\"   Or use local mode: python main.py --mode local\")\n        sys.exit(1)\n    \n    # Check if mixed mode is requested but reMarkable not available\n    if args.mode == 'mixed' and not REMARKABLE_AVAILABLE:\n        print(\"\u274c Error: Mixed mode requires reMarkable Cloud integration!\")\n        print(\"   Install with: pip install -r requirements-remarkable.txt\")\n        print(\"   Or use onedrive mode: python main.py --mode onedrive\")\n        sys.exit(1)\n    \n    # Setup environment\n    setup_environment()\n    \n    # Validate API key\n    api_key = validate_api_key(args.api_key)\n    \n    # Load reMarkable configuration\n    remarkable_config = load_remarkable_config(args.remarkable_config)\n    \n    # Load OneDrive configuration\n    onedrive_config = load_onedrive_config(args.onedrive_config)\n    \n    # Override config with command line arguments\n    if args.mode in ['remarkable', 'both', 'mixed'] or args.remarkable_document_id:\n        remarkable_config.update({\n            'enabled': True,\n            'watch_folder_path': args.remarkable_watch_folder,\n            'output_folder_path': args.remarkable_output_folder,\n            'poll_interval': args.remarkable_poll_interval,\n        })\n        \n        # For mixed mode, we only watch gpt_out folder in reMarkable, not the regular input folder\n        if args.mode == 'mixed':\n            remarkable_config['watch_folder_path'] = '/gpt_out'  # Force gpt_out folder for mixed mode\n        \n        if args.remarkable_one_time_code:\n            remarkable_config['one_time_code'] = args.remarkable_one_time_code\n    \n    # Override OneDrive config with command line arguments\n    if args.mode in ['onedrive', 'both', 'mixed']:\n        onedrive_config.update({\n            'enabled': True,\n            'watch_folder_path': args.onedrive_watch_folder,\n            'output_folder_path': args.onedrive_output_folder,\n            'poll_interval': args.onedrive_poll_interval,\n        })\n        \n        # For mixed mode, also include reMarkable input folder configuration\n        if args.mode == 'mixed':\n            onedrive_config['remarkable_input_folder'] = args.remarkable_watch_folder\n            onedrive_config['remarkable_poll_interval'] = args.remarkable_poll_interval\n        \n        if args.onedrive_client_id:\n            onedrive_config['client_id'] = args.onedrive_client_id\n    \n    # Print banner\n    print(\"=\" * 70)\n    print(\"\ud83d\udd8b\ufe0f  E-INK LLM ASSISTANT\")\n    print(\"    AI-Powered Handwriting & Drawing Analysis\")\n    if args.mode == 'mixed':\n        print(\"    with Mixed Cloud Integration (OneDrive + reMarkable Input/Output)\")\n    elif remarkable_config.get('enabled'):\n        print(\"    with reMarkable Cloud Integration\")\n    elif onedrive_config.get('enabled'):\n        print(\"    with OneDrive Integration\")\n    print(\"=\" * 70)\n    \n    try:\n        if args.file:\n            # Single file processing mode\n            file_path = Path(args.file)\n            if not file_path.exists():\n                print(f\"\u274c Error: File not found: {file_path}\")\n                sys.exit(1)\n            \n            print(f\"\ud83d\udcc4 Single file mode: {file_path.name}\")\n            result = await process_single_file(\n                str(file_path),\n                api_key,\n                conversation_id=args.conversation_id,\n                compact_mode=compact_mode,\n                auto_detect_session=not args.no_auto_detect,\n                enable_multi_page=not args.no_multi_page,\n                max_pages=args.max_pages,\n                enable_editing_workflow=not args.no_editing_workflow,\n                enable_hybrid_mode=enable_hybrid_mode\n            )\n            \n            if result:\n                print(f\"\u2705 Processing complete!\")\n                print(f\"\ud83d\udcc4 Response saved: {Path(result).name}\")\n            else:\n                print(f\"\u274c Processing failed\")\n                sys.exit(1)\n        \n        elif args.remarkable_document_id:\n            # Single reMarkable document processing mode\n            print(f\"\ud83c\udf10 Single reMarkable document mode: {args.remarkable_document_id}\")\n            result = await process_single_remarkable_file(\n                args.remarkable_document_id, \n                api_key, \n                remarkable_config\n            )\n            \n            if result:\n                print(f\"\u2705 Processing complete!\")\n                print(f\"\ud83d\udcc4 Response saved: {Path(result).name}\")\n            else:\n                print(f\"\u274c Processing failed\")\n                sys.exit(1)\n        \n        else:\n            # File watcher mode (default)\n            if args.mode == 'mixed':\n                # Mixed mode: OneDrive + reMarkable gpt_out watching\n                if not onedrive_config.get('client_id'):\n                    print(\"\u274c Error: OneDrive client_id required for mixed mode\")\n                    print(\"   Set via --onedrive-client-id or in config file\")\n                    sys.exit(1)\n                \n                # Setup reMarkable session for mixed mode\n                from mixed_cloud_processor import create_remarkable_session\n                \n                print(\"\ud83d\udd10 Authenticating with reMarkable Cloud...\")\n                try:\n                    remarkable_session = create_remarkable_session(remarkable_config)\n                    print(\"\u2705 reMarkable authentication successful\")\n                except Exception as e:\n                    print(f\"\u274c Error: Failed to authenticate with reMarkable Cloud: {e}\")\n                    sys.exit(1)\n                \n                # Create and start mixed processor\n                mixed_processor = create_mixed_processor(\n                    onedrive_config, \n                    remarkable_session, \n                    api_key\n                )\n                await mixed_processor.start_watching()\n                \n            elif args.mode == 'onedrive':\n                # OneDrive only mode\n                if not onedrive_config.get('client_id'):\n                    print(\"\u274c Error: OneDrive client_id required for OneDrive mode\")\n                    print(\"   Set via --onedrive-client-id or in config file\")\n                    sys.exit(1)\n                \n                processor = OneDriveProcessor(onedrive_config, api_key)\n                await processor.start_watching()\n                \n            elif args.mode == 'both':\n                # Both reMarkable and OneDrive (run concurrently)\n                print(\"\ud83d\udd04 Starting both reMarkable and OneDrive watchers...\")\n                \n                tasks = []\n                \n                # Start reMarkable watcher if configured\n                if remarkable_config.get('enabled'):\n                    remarkable_processor = RemarkableEInkProcessor(\n                        api_key=api_key,\n                        watch_folder=args.watch_folder,\n                        remarkable_config=remarkable_config\n                    )\n                    tasks.append(remarkable_processor.start_watching(process_existing=not args.no_existing, mode='remarkable'))\n                \n                # Start OneDrive watcher if configured\n                if onedrive_config.get('enabled'):\n                    if not onedrive_config.get('client_id'):\n                        print(\"\u274c Error: OneDrive client_id required for both mode\")\n                        print(\"   Set via --onedrive-client-id or in config file\")\n                        sys.exit(1)\n                    \n                    onedrive_processor = OneDriveProcessor(onedrive_config, api_key)\n                    tasks.append(onedrive_processor.start_watching())\n                \n                if not tasks:\n                    print(\"\u274c Error: No valid configurations for both mode\")\n                    sys.exit(1)\n                \n                # Run both watchers concurrently\n                await asyncio.gather(*tasks)\n                \n            elif remarkable_config.get('enabled'):\n                # Use enhanced processor with reMarkable support\n                processor = RemarkableEInkProcessor(\n                    api_key=api_key, \n                    watch_folder=args.watch_folder,\n                    remarkable_config=remarkable_config\n                )\n            else:\n                # Use original processor for local-only mode\n                watch_folder = args.watch_folder or \"./watch\"\n                processor = EInkLLMProcessor(\n                    api_key=api_key, \n                    watch_folder=watch_folder,\n                    conversation_id=args.conversation_id,\n                    compact_mode=compact_mode,\n                    auto_detect_session=not args.no_auto_detect,\n                    enable_multi_page=not args.no_multi_page,\n                    max_pages=args.max_pages,\n                    enable_editing_workflow=not args.no_editing_workflow,\n                    enable_hybrid_mode=enable_hybrid_mode\n                )\n            \n            # For non-mixed/non-onedrive/non-both modes, start the processor\n            if args.mode not in ['onedrive', 'both', 'mixed']:\n                process_existing = not args.no_existing\n                \n                if hasattr(processor, 'start_watching') and len(processor.start_watching.__code__.co_varnames) > 2:\n                    # Enhanced processor with mode support\n                    await processor.start_watching(process_existing=process_existing, mode=args.mode)\n                else:\n                    # Original processor\n                    await processor.start_watching(process_existing=process_existing)\n    \n    except KeyboardInterrupt:\n        print(f\"\\n\ud83d\udc4b Goodbye!\")\n    except Exception as e:\n        print(f\"\\n\u274c Unexpected error: {e}\")\n        if args.verbose:\n            import traceback\n            traceback.print_exc()\n        sys.exit(1)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/main.py",
      "tags": [
        "async",
        "cli",
        "entry-point",
        "file-processing",
        "ai-vision",
        "gpt-4",
        "openai",
        "e-ink",
        "handwriting-recognition",
        "pdf-processing",
        "cloud-sync",
        "remarkable",
        "onedrive",
        "conversation-management",
        "file-watcher",
        "document-processing",
        "argparse",
        "multi-mode",
        "session-management",
        "timeline-generation"
      ],
      "updated_at": "2025-12-07T01:59:48.485387",
      "usage_example": "import asyncio\nimport sys\n\n# Example 1: Process a single file\nsys.argv = ['main.py', '--file', 'drawing.pdf', '--api-key', 'sk-...']\nawait main()\n\n# Example 2: Start file watcher in local mode\nsys.argv = ['main.py', '--watch-folder', './documents']\nawait main()\n\n# Example 3: Use reMarkable Cloud mode\nsys.argv = ['main.py', '--mode', 'remarkable', '--remarkable-one-time-code', 'abc123']\nawait main()\n\n# Example 4: Continue existing conversation\nsys.argv = ['main.py', '--file', 'question.pdf', '--conversation-id', 'conv_20250731_143022_a8f9c2d1']\nawait main()\n\n# Example 5: List active conversations\nsys.argv = ['main.py', '--list-conversations']\nawait main()\n\n# Example 6: Generate conversation timeline\nsys.argv = ['main.py', '--generate-timeline', 'conv_20250731_143022_a8f9c2d1']\nawait main()\n\n# Example 7: Mixed cloud mode (OneDrive + reMarkable)\nsys.argv = ['main.py', '--mode', 'mixed', '--onedrive-client-id', 'azure-client-id']\nawait main()\n\n# Run with: python -c \"import asyncio; from main import main; asyncio.run(main())\""
    },
    {
      "best_practices": [
        "Ensure all required configuration constants (TENANT_ID, CLIENT_ID, etc.) are defined before calling this function",
        "The EmailSearchApp class must be properly implemented with all required methods",
        "Verify that the Azure AD application has appropriate Microsoft Graph API permissions (Mail.Read, Mail.ReadWrite)",
        "Ensure OUTPUT_DIR exists or the application has permissions to create it",
        "Handle KeyboardInterrupt gracefully to allow users to cancel long-running operations",
        "The function uses max_results=50 for pagination; adjust based on expected email volume",
        "Monitor console output for progress updates and error messages",
        "Review the generated CSV register file for audit trail of downloaded attachments",
        "Implement proper secret management for CLIENT_SECRET (use environment variables or key vault)",
        "Consider implementing retry logic for network failures in the EmailSearchApp class methods"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:21:05",
      "decorators": [],
      "dependencies": [
        "msal",
        "requests",
        "os",
        "base64",
        "csv",
        "typing",
        "datetime",
        "pathlib"
      ],
      "description": "Orchestrates an email search and PDF attachment download workflow using Microsoft Graph API, including authentication, email search, result display, and attachment processing.",
      "docstring": "Main execution function",
      "id": 1858,
      "imports": [
        "import os",
        "import base64",
        "import csv",
        "import msal",
        "import requests",
        "from typing import List",
        "from typing import Dict",
        "from typing import Optional",
        "from datetime import datetime",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import os",
        "import base64",
        "import csv",
        "import msal",
        "import requests",
        "from typing import List, Dict, Optional",
        "from datetime import datetime",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 505,
      "line_start": 436,
      "name": "main_v13",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This is the main entry point for an email search application that connects to Microsoft 365, searches for emails based on sender and keyword criteria, displays results, downloads PDF attachments, and generates a download register. It handles the complete workflow from authentication through file download and metadata tracking.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects including console output, file downloads to OUTPUT_DIR, and creation of a CSV register file at REGISTER_FILE location.",
      "settings_required": [
        "TENANT_ID: Azure AD tenant identifier",
        "CLIENT_ID: Azure AD application (client) ID",
        "CLIENT_SECRET: Azure AD application client secret",
        "TARGET_MAILBOX: Email address of the mailbox to search",
        "SCOPES: List of Microsoft Graph API permission scopes (e.g., ['https://graph.microsoft.com/.default'])",
        "SENDER_EMAIL: Email address to filter search results by sender",
        "SEARCH_KEYWORD: Keyword to search for in email content",
        "OUTPUT_DIR: Directory path where PDF attachments will be saved",
        "REGISTER_FILE: File path for the CSV download register",
        "EmailSearchApp class must be defined with methods: authenticate(), search_emails(), display_email_list(), download_pdf_attachments(), save_download_register()"
      ],
      "source_code": "def main():\n    \"\"\"Main execution function\"\"\"\n    \n    # Initialize the application\n    app = EmailSearchApp(\n        tenant_id=TENANT_ID,\n        client_id=CLIENT_ID,\n        client_secret=CLIENT_SECRET,\n        target_mailbox=TARGET_MAILBOX\n    )\n    \n    try:\n        # Step 1: Authenticate user\n        app.authenticate(SCOPES)\n        \n        # Step 2: Search for emails\n        emails = app.search_emails(\n            sender=SENDER_EMAIL,\n            keyword=SEARCH_KEYWORD,\n            max_results=50  # Results per page\n        )\n        \n        # Step 3: Display results\n        app.display_email_list(emails)\n        \n        # Step 4: Download PDF attachments\n        if emails:\n            print(f\"\\n{'='*80}\")\n            print(\"Downloading PDF Attachments\")\n            print(f\"{'='*80}\\n\")\n            \n            all_download_records = []\n            \n            for idx, email in enumerate(emails, 1):\n                subject = email.get(\"subject\", \"(No Subject)\")\n                has_attachments = email.get(\"hasAttachments\", False)\n                \n                print(f\"[{idx}] Processing: {subject[:60]}...\")\n                \n                if not has_attachments:\n                    print(f\"  \u2298 No attachments\")\n                    continue\n                \n                # Download returns metadata for each file\n                download_metadata = app.download_pdf_attachments(\n                    email=email,\n                    output_dir=OUTPUT_DIR\n                )\n                \n                all_download_records.extend(download_metadata)\n            \n            # Save register\n            if all_download_records:\n                app.save_download_register(all_download_records, REGISTER_FILE)\n            \n            print(f\"\\n{'='*80}\")\n            print(f\"\u2713 Download completed!\")\n            print(f\"Total PDF files downloaded: {len(all_download_records)}\")\n            print(f\"Saved to: {os.path.abspath(OUTPUT_DIR)}\")\n            print(f\"Register: {os.path.abspath(REGISTER_FILE)}\")\n            print(f\"{'='*80}\")\n        \n        print(f\"\\n\u2713 Search completed successfully!\")\n        print(f\"Total emails found: {len(emails)}\")\n        \n    except KeyboardInterrupt:\n        print(\"\\n\\n\u2717 Operation cancelled by user\")\n    except Exception as e:\n        print(f\"\\n\u2717 Error occurred: {str(e)}\")\n        raise",
      "source_file": "/tf/active/vicechatdev/mailsearch/email_search_app.py",
      "tags": [
        "email-processing",
        "microsoft-graph",
        "oauth2",
        "attachment-download",
        "pdf-extraction",
        "workflow-orchestration",
        "file-management",
        "authentication",
        "api-integration",
        "batch-processing"
      ],
      "updated_at": "2025-12-07T01:59:48.484432",
      "usage_example": "# Configuration constants\nTENANT_ID = 'your-tenant-id'\nCLIENT_ID = 'your-client-id'\nCLIENT_SECRET = 'your-client-secret'\nTARGET_MAILBOX = 'user@example.com'\nSCOPES = ['https://graph.microsoft.com/.default']\nSENDER_EMAIL = 'sender@example.com'\nSEARCH_KEYWORD = 'invoice'\nOUTPUT_DIR = './downloads'\nREGISTER_FILE = './download_register.csv'\n\n# Ensure EmailSearchApp class is defined\n# from email_search_app import EmailSearchApp\n\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function expects parse_arguments() to be defined elsewhere in the module and return a properly structured args object",
        "Ensure the configuration file exists and contains all required sections (filecloud, logging, document_processing) before calling",
        "The function creates directories automatically but requires write permissions in the working directory",
        "Use --dry-run flag first to verify FileCloud connectivity and document discovery before running full analysis",
        "Monitor the logs directory for detailed execution logs, especially when debugging issues",
        "The function handles KeyboardInterrupt gracefully, allowing users to stop long-running analyses",
        "LLM usage statistics are only displayed if tokens were actually consumed during analysis",
        "Command-line arguments override configuration file settings, allowing flexible runtime customization",
        "The concurrent parameter controls parallelism; adjust based on system resources and API rate limits",
        "Exit codes follow Unix conventions: check return value for automation/scripting purposes"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when dry_run mode is enabled (args.dry_run is True)",
          "import": "from utils.filecloud_client import FileCloudClient",
          "optional": true
        }
      ],
      "created_at": "2025-12-06 10:17:24",
      "decorators": [],
      "dependencies": [
        "os",
        "sys",
        "argparse",
        "pathlib"
      ],
      "description": "Main entry point function for the Contract Validity Analyzer application that orchestrates configuration loading, logging setup, FileCloud connection, and contract analysis execution.",
      "docstring": "Main entry point.",
      "id": 396,
      "imports": [
        "import os",
        "import sys",
        "import argparse",
        "from pathlib import Path",
        "from config.config import Config",
        "from core.analyzer import ContractAnalyzer",
        "from utils.logging_utils import setup_logging",
        "from utils.logging_utils import get_logger",
        "from utils.filecloud_client import FileCloudClient"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import argparse",
        "from pathlib import Path",
        "from config.config import Config",
        "from core.analyzer import ContractAnalyzer",
        "from utils.logging_utils import setup_logging",
        "from utils.logging_utils import get_logger"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 186,
      "line_start": 87,
      "name": "main_v12",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary entry point for a command-line application that analyzes contracts from FileCloud storage. It handles argument parsing, configuration management, logging initialization, optional dry-run mode for document discovery, and executes the full contract analysis pipeline with concurrent processing support. The function manages the complete lifecycle from initialization through execution to summary reporting, including error handling and graceful shutdown.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful completion, 1 for errors (including KeyboardInterrupt, connection failures, or fatal exceptions). This follows standard Unix convention for process exit codes.",
      "settings_required": [
        "Configuration file (default or specified via --config argument) with sections: 'filecloud', 'logging', 'document_processing'",
        "parse_arguments() function must be defined to return args object with attributes: config, path, verbose, extensions, output_dir, concurrent, dry_run, max_files",
        "FileCloud credentials and connection settings in configuration file",
        "Write permissions for creating 'logs' directory and optional output directory",
        "ContractAnalyzer class must be available from core.analyzer module",
        "FileCloudClient class must be available from utils.filecloud_client module",
        "Config class must support methods: get_section(), set()",
        "Logging utilities must be available from utils.logging_utils"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point.\"\"\"\n    args = parse_arguments()\n    \n    try:\n        # Load configuration\n        config = Config(args.config)\n        \n        # Override configuration with command line arguments\n        if args.path:\n            config.set('filecloud', {**config.get_section('filecloud'), 'base_path': args.path})\n        \n        if args.verbose:\n            config.set('logging', {**config.get_section('logging'), 'level': 'DEBUG'})\n        \n        if args.extensions:\n            extensions = [ext.strip() for ext in args.extensions.split(',')]\n            config.set('document_processing', {**config.get_section('document_processing'), 'supported_extensions': extensions})\n        \n        # Set up logging\n        log_dir = \"logs\"\n        if args.output_dir:\n            log_dir = os.path.join(args.output_dir, \"logs\")\n        \n        os.makedirs(log_dir, exist_ok=True)\n        setup_logging(config.get_section('logging'), log_dir)\n        \n        logger = get_logger(__name__)\n        logger.info(\"Starting Contract Validity Analyzer\")\n        logger.info(f\"Configuration: {config.config_path}\")\n        logger.info(f\"FileCloud path: {config.get_section('filecloud').get('base_path')}\")\n        logger.info(f\"Concurrent threads: {args.concurrent}\")\n        \n        # Create output directory if specified\n        if args.output_dir:\n            output_dir = os.path.join(args.output_dir, \"output\")\n            os.makedirs(output_dir, exist_ok=True)\n            config.set('output_dir', output_dir)\n        \n        # Dry run mode\n        if args.dry_run:\n            logger.info(\"DRY RUN MODE - Discovering documents without processing\")\n            \n            from utils.filecloud_client import FileCloudClient\n            \n            # Connect to FileCloud and list documents\n            fc_client = FileCloudClient(config.get_section('filecloud'))\n            if not fc_client.connect():\n                logger.error(\"Failed to connect to FileCloud\")\n                return 1\n            \n            documents = fc_client.search_documents()\n            fc_client.disconnect()\n            \n            if documents:\n                logger.info(f\"Found {len(documents)} documents to analyze:\")\n                for doc in documents:\n                    logger.info(f\"  - {doc['filename']} ({doc['size']} bytes)\")\n            else:\n                logger.warning(\"No documents found\")\n            \n            return 0\n        \n        # Initialize and run analyzer\n        analyzer = ContractAnalyzer(config.config)\n        \n        # Set up analysis parameters\n        analysis_kwargs = {'max_concurrent': args.concurrent}\n        if args.max_files:\n            analysis_kwargs['max_files'] = args.max_files\n        \n        results = analyzer.analyze_contracts(**analysis_kwargs)\n        \n        # Print summary\n        stats = analyzer.get_summary_stats()\n        if stats:\n            logger.info(\"=\" * 50)\n            logger.info(\"ANALYSIS SUMMARY\")\n            logger.info(\"=\" * 50)\n            for key, value in stats.items():\n                logger.info(f\"{key.replace('_', ' ').title()}: {value}\")\n            logger.info(\"=\" * 50)\n        \n        # Print LLM usage stats\n        llm_stats = analyzer.llm_client.get_usage_stats()\n        if llm_stats.get('total_tokens', 0) > 0:\n            logger.info(\"LLM Usage Statistics:\")\n            logger.info(f\"  Total tokens: {llm_stats['total_tokens']:,}\")\n            logger.info(f\"  Prompt tokens: {llm_stats['total_prompt_tokens']:,}\")\n            logger.info(f\"  Completion tokens: {llm_stats['total_completion_tokens']:,}\")\n        \n        logger.info(\"Analysis complete!\")\n        return 0\n        \n    except KeyboardInterrupt:\n        logger.info(\"Analysis interrupted by user\")\n        return 1\n    except Exception as e:\n        logger.error(f\"Fatal error: {e}\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/contract_validity_analyzer/main.py",
      "tags": [
        "entry-point",
        "main-function",
        "cli-application",
        "contract-analysis",
        "filecloud",
        "configuration-management",
        "logging",
        "concurrent-processing",
        "document-processing",
        "dry-run",
        "error-handling",
        "orchestration"
      ],
      "updated_at": "2025-12-07T01:59:48.483430",
      "usage_example": "# This function is designed to be called as the main entry point of the application\n# Typically invoked from a script like:\n\nif __name__ == '__main__':\n    import sys\n    sys.exit(main())\n\n# Command line usage examples:\n# Basic run:\n# python script.py --config config.yaml\n\n# Dry run to discover documents:\n# python script.py --config config.yaml --dry-run\n\n# With custom settings:\n# python script.py --config config.yaml --path /contracts --verbose --concurrent 5 --max-files 100\n\n# With custom extensions:\n# python script.py --config config.yaml --extensions pdf,docx,txt --output-dir ./results"
    },
    {
      "best_practices": [
        "Always specify the --pattern argument as it is required for execution",
        "Use --start-date to filter data to relevant time periods and improve performance",
        "When using --pattern all, be aware that output filenames will be automatically modified with pattern suffixes",
        "Use --sample-size for testing or when working with large datasets to limit processing time",
        "Enable --skip-geocoding if coordinates are not needed to speed up processing",
        "Use --cache-only to avoid API rate limits when geocoding data that may already be cached",
        "Check return code (0 for success, 1 for error) when calling programmatically",
        "Ensure the PatternBasedExtractor class is properly defined and imported before calling main()",
        "The function prints detailed progress information to stdout, so redirect or capture if needed",
        "Handle early exits gracefully - function returns None if no mixed farms or patterns are found"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only used in exception handling block when errors occur",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 08:22:13",
      "decorators": [],
      "dependencies": [
        "argparse",
        "os",
        "sys",
        "pandas",
        "numpy",
        "datetime",
        "typing",
        "traceback",
        "matched_sample_analysis",
        "extractor"
      ],
      "description": "Command-line interface function that orchestrates pattern-based extraction of poultry flock data, including data loading, pattern classification, geocoding, and export functionality.",
      "docstring": "Main function for pattern-based extraction.",
      "id": 70,
      "imports": [
        "import os",
        "import sys",
        "import pandas as pd",
        "import numpy as np",
        "import argparse",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Tuple",
        "from matched_sample_analysis import MatchedSampleAnalyzer",
        "from extractor import PehestatDataExtractor",
        "import traceback"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import pandas as pd",
        "import numpy as np",
        "import argparse",
        "from datetime import datetime",
        "from typing import Dict, List, Optional, Tuple",
        "from matched_sample_analysis import MatchedSampleAnalyzer",
        "from extractor import PehestatDataExtractor",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 622,
      "line_start": 505,
      "name": "main_v11",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no direct parameters. All configuration is handled through command-line arguments parsed via argparse, including: --pattern (required: sequential/concurrent/mixed/all), --output (CSV filename), --sample-size (number of flocks), --geocoded-data (path to geocoded data), --data-dir (Pehestat data directory), --skip-geocoding (flag), --cache-only (flag), --create-map (flag), --map-output (map filename), --use-clustering (flag), --start-date (YYYY-MM-DD format)"
      },
      "parent_class": null,
      "purpose": "This is the main entry point for a pattern-based poultry data extraction tool. It processes command-line arguments to extract flock data based on In-Ovo usage patterns (sequential, concurrent, mixed, or all), filters data by date, optionally performs geocoding and map generation, and exports results to CSV files. The function coordinates multiple extraction steps including data loading, mixed farm identification, pattern classification, data enrichment, and result export.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful completion, 1 for error conditions. Returns None implicitly if no mixed farms or patterns are found (early exit scenarios).",
      "settings_required": [
        "Data directory containing Pehestat data files (default: /tf/active/pehestat_data, configurable via --data-dir)",
        "PatternBasedExtractor class must be available in the module scope",
        "Optional: Geocoded data file for coordinate enrichment (via --geocoded-data)",
        "Optional: Geocoding API credentials if not using --skip-geocoding or --cache-only flags",
        "File system write permissions for output CSV and map files"
      ],
      "source_code": "def main():\n    \"\"\"Main function for pattern-based extraction.\"\"\"\n    parser = argparse.ArgumentParser(description='Pattern-Based Poultry Data Extraction')\n    parser.add_argument('--pattern', type=str, required=True, \n                       choices=['sequential', 'concurrent', 'mixed', 'all'],\n                       help='In-Ovo usage pattern to extract')\n    parser.add_argument('--output', type=str, default=None,\n                       help='Output CSV filename (default: auto-generated)')\n    parser.add_argument('--sample-size', type=int, default=None,\n                       help='Number of flocks to sample (default: extract all)')\n    parser.add_argument('--geocoded-data', type=str, default=None,\n                       help='Path to geocoded data file for coordinate enrichment')\n    parser.add_argument('--data-dir', type=str, default='/tf/active/pehestat_data',\n                       help='Directory containing Pehestat data files')\n    parser.add_argument('--skip-geocoding', action='store_true',\n                       help='Skip geocoding and map generation')\n    parser.add_argument('--cache-only', action='store_true',\n                       help='Use geocoding cache only (no API calls)')\n    parser.add_argument('--create-map', action='store_true',\n                       help='Create interactive map (requires geocoding)')\n    parser.add_argument('--map-output', type=str, default=None,\n                       help='Output map filename (default: auto-generated)')\n    parser.add_argument('--use-clustering', action='store_true',\n                       help='Enable marker clustering on the map')\n    parser.add_argument('--start-date', type=str, default='2020-01-01',\n                       help='Start date filter (YYYY-MM-DD, default: 2020-01-01)')\n    \n    args = parser.parse_args()\n    \n    print(\"=\" * 80)\n    print(\"PATTERN-BASED POULTRY DATA EXTRACTION\")\n    print(\"=\" * 80)\n    print(f\"Target pattern: {args.pattern}\")\n    print(f\"Start date filter: {args.start_date}\")\n    print(f\"Sample size: {'All flocks' if args.sample_size is None else f'{args.sample_size:,} flocks'}\")\n    print(f\"Data directory: {args.data_dir}\")\n    if args.geocoded_data:\n        print(f\"Geocoded data: {args.geocoded_data}\")\n    if not args.skip_geocoding:\n        if args.cache_only:\n            print(\"Geocoding: Cache-only mode (no API calls)\")\n        else:\n            print(\"Geocoding: Full mode (includes API calls if needed)\")\n        if args.create_map:\n            print(\"Map generation: Enabled\")\n    else:\n        print(\"Geocoding: Disabled\")\n    print(\"=\" * 80)\n    \n    try:\n        # Initialize extractor\n        extractor = PatternBasedExtractor(\n            data_dir=args.data_dir,\n            geocoded_file=args.geocoded_data\n        )\n        \n        # Load and filter base data\n        flocks_df = extractor.load_and_filter_base_data(start_date=args.start_date)\n        \n        # Identify mixed farms\n        mixed_farms_df = extractor.identify_mixed_farms(flocks_df)\n        \n        if len(mixed_farms_df) == 0:\n            print(\"No mixed farms found! Cannot proceed with pattern extraction.\")\n            return\n        \n        # Classify farm patterns\n        patterns_df = extractor.classify_farm_patterns(flocks_df, mixed_farms_df)\n        \n        if len(patterns_df) == 0:\n            print(\"No farm patterns could be classified! Cannot proceed.\")\n            return\n        \n        # Extract flocks by pattern\n        if args.pattern == 'all':\n            # Extract all patterns\n            for pattern in ['sequential', 'concurrent', 'mixed']:\n                pattern_flocks = extractor.extract_flocks_by_pattern(\n                    pattern, flocks_df, patterns_df, args.sample_size\n                )\n                \n                if len(pattern_flocks) > 0:\n                    # Enrich data\n                    enriched_flocks = extractor.enrich_flock_data(pattern_flocks)\n                    \n                    # Export results\n                    output_file = args.output\n                    if output_file and args.pattern == 'all':\n                        # Modify filename for each pattern\n                        base, ext = os.path.splitext(output_file)\n                        output_file = f\"{base}_{pattern}{ext}\"\n                    \n                    extractor.export_results(enriched_flocks, pattern, output_file)\n        else:\n            # Extract specific pattern\n            pattern_flocks = extractor.extract_flocks_by_pattern(\n                args.pattern, flocks_df, patterns_df, args.sample_size\n            )\n            \n            if len(pattern_flocks) == 0:\n                print(f\"No flocks found for pattern '{args.pattern}'!\")\n                return\n            \n            # Enrich data\n            enriched_flocks = extractor.enrich_flock_data(pattern_flocks)\n            \n            # Export results\n            extractor.export_results(enriched_flocks, args.pattern, args.output)\n        \n        print(\"\\n\u2705 Pattern-based extraction completed successfully!\")\n        \n    except Exception as e:\n        print(f\"\\n\u274c Error during pattern-based extraction: {e}\")\n        import traceback\n        traceback.print_exc()\n        return 1\n    \n    return 0",
      "source_file": "/tf/active/vicechatdev/pattern_based_extraction.py",
      "tags": [
        "cli",
        "command-line-interface",
        "data-extraction",
        "poultry-data",
        "pattern-analysis",
        "geocoding",
        "data-processing",
        "csv-export",
        "map-generation",
        "argparse",
        "main-function",
        "entry-point",
        "flock-data",
        "in-ovo-patterns"
      ],
      "updated_at": "2025-12-07T01:59:48.482310",
      "usage_example": "# Run from command line:\n# Extract sequential pattern flocks from 2020 onwards\npython script.py --pattern sequential --start-date 2020-01-01 --output sequential_flocks.csv\n\n# Extract all patterns with sampling and geocoding\npython script.py --pattern all --sample-size 1000 --geocoded-data geocoded.csv --create-map\n\n# Extract concurrent pattern without geocoding\npython script.py --pattern concurrent --skip-geocoding --output concurrent_only.csv\n\n# Extract mixed pattern with cache-only geocoding and clustering map\npython script.py --pattern mixed --cache-only --create-map --use-clustering --map-output mixed_map.html\n\n# Programmatic usage (if called from Python):\nif __name__ == '__main__':\n    sys.exit(main())"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of the script using if __name__ == '__main__': main()",
        "Ensure DocumentAnalyzer class is properly defined before calling this function",
        "Configure logging before calling main() to capture all log messages",
        "The function expects a CSV register file with specific format - ensure compatibility",
        "Use --limit parameter during testing to avoid processing large document sets",
        "Ensure sufficient disk space in output directory for results",
        "Handle keyboard interrupts gracefully if processing large batches",
        "The function will raise exceptions on fatal errors - wrap in try-except if calling programmatically",
        "Verify all system dependencies (Tesseract, poppler) are installed before running",
        "Set appropriate OpenAI API credentials before execution"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside the function body, only when main() is called",
          "import": "import argparse",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 22:16:33",
      "decorators": [],
      "dependencies": [
        "argparse",
        "logging",
        "csv",
        "json",
        "pathlib",
        "datetime",
        "typing",
        "numpy",
        "pdf2image",
        "pytesseract",
        "easyocr",
        "PIL",
        "openai"
      ],
      "description": "Command-line interface function that orchestrates PDF document analysis using OCR and LLM processing, with configurable input/output paths and processing limits.",
      "docstring": "Main execution function",
      "id": 1845,
      "imports": [
        "import os",
        "import sys",
        "import csv",
        "import json",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "import logging",
        "import numpy as np",
        "from pdf2image import convert_from_path",
        "import pytesseract",
        "import easyocr",
        "from PIL import Image",
        "from openai import OpenAI",
        "import argparse",
        "import re"
      ],
      "imports_required": [
        "import argparse",
        "import logging",
        "import csv",
        "import json",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict, List, Optional, Any",
        "import numpy as np",
        "from pdf2image import convert_from_path",
        "import pytesseract",
        "import easyocr",
        "from PIL import Image",
        "from openai import OpenAI"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 617,
      "line_start": 563,
      "name": "main_v10",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no direct parameters. It uses argparse to parse command-line arguments: --register (path to CSV file containing document registry, default './output/download_register.csv'), --limit (optional integer to limit number of documents processed for testing), and --output-dir (directory for saving results, default './output')"
      },
      "parent_class": null,
      "purpose": "Serves as the main entry point for a document analysis application that reads PDF files from a download register, processes them using a DocumentAnalyzer class (which performs OCR and LLM analysis), and saves structured results. Designed for batch processing of PDF documents with progress tracking and error handling.",
      "return_annotation": null,
      "return_explained": "Returns None. The function performs side effects including printing status messages to stdout, processing documents through DocumentAnalyzer, and saving results to files. May raise exceptions on fatal errors.",
      "settings_required": [
        "DocumentAnalyzer class must be defined and available in the same module or imported",
        "logger object must be configured and available in the module scope",
        "OpenAI API key must be configured (likely via environment variable OPENAI_API_KEY)",
        "Tesseract OCR must be installed on the system for pytesseract",
        "poppler-utils must be installed for pdf2image to work",
        "Input CSV register file must exist at specified path with expected format",
        "Output directory must be writable or creatable",
        "PDF files referenced in the register must be accessible"
      ],
      "source_code": "def main():\n    \"\"\"Main execution function\"\"\"\n    import argparse\n    \n    parser = argparse.ArgumentParser(description=\"Analyze downloaded PDF documents\")\n    parser.add_argument(\n        '--register',\n        default='./output/download_register.csv',\n        help='Path to download register CSV'\n    )\n    parser.add_argument(\n        '--limit',\n        type=int,\n        default=None,\n        help='Limit number of documents to process (for testing)'\n    )\n    parser.add_argument(\n        '--output-dir',\n        default='./output',\n        help='Output directory for results'\n    )\n    \n    args = parser.parse_args()\n    \n    print(f\"\\n{'='*80}\")\n    print(\"Document Analyzer - PDF Analysis with OCR and LLM\")\n    print(f\"{'='*80}\\n\")\n    \n    try:\n        # Initialize analyzer\n        analyzer = DocumentAnalyzer(output_dir=args.output_dir)\n        \n        # Process documents\n        results = analyzer.process_documents_from_register(\n            register_path=args.register,\n            limit=args.limit\n        )\n        \n        # Save results\n        analyzer.save_results(results)\n        \n        # Summary\n        successful = sum(1 for r in results if r['success'])\n        failed = len(results) - successful\n        \n        print(f\"\\n{'='*80}\")\n        print(f\"Processing Complete!\")\n        print(f\"  Total documents: {len(results)}\")\n        print(f\"  Successful: {successful}\")\n        print(f\"  Failed: {failed}\")\n        print(f\"{'='*80}\\n\")\n        \n    except Exception as e:\n        logger.error(f\"Fatal error: {e}\")\n        raise",
      "source_file": "/tf/active/vicechatdev/mailsearch/document_analyzer.py",
      "tags": [
        "cli",
        "command-line",
        "entry-point",
        "pdf-processing",
        "ocr",
        "llm",
        "document-analysis",
        "batch-processing",
        "argparse",
        "main-function",
        "orchestration",
        "error-handling"
      ],
      "updated_at": "2025-12-07T01:59:48.481152",
      "usage_example": "# Run from command line with default settings:\n# python script.py\n\n# Run with custom register path and limit:\n# python script.py --register /path/to/register.csv --limit 10 --output-dir /path/to/output\n\n# In Python code (if calling directly):\nif __name__ == '__main__':\n    main()\n\n# The function expects to be run as a script entry point and will:\n# 1. Parse command-line arguments\n# 2. Initialize DocumentAnalyzer with output directory\n# 3. Process documents from the register CSV\n# 4. Save results and print summary statistics"
    },
    {
      "best_practices": [
        "This function must be run using asyncio.run(main()) or equivalent async event loop",
        "Ensure all required configuration settings are properly set before calling this function",
        "The function registers signal handlers which may interfere with other signal handling in the application - avoid multiple signal handler registrations",
        "The SMTPServer class must implement stop() and run_forever() methods for proper operation",
        "Logging should be configured to handle concurrent async operations if the SMTP server processes multiple connections",
        "The function calls sys.exit() which terminates the entire process - ensure all cleanup is handled in the finally block or signal handlers",
        "On Windows, SIGTERM may not be available - consider platform-specific signal handling if cross-platform support is needed",
        "The print_banner() function is called but not imported in the provided imports list - ensure this function is available in scope",
        "Consider implementing a timeout mechanism for smtp_server.run_forever() to prevent indefinite hanging",
        "The signal_handler function is synchronous but calls smtp_server.stop() - ensure stop() is safe to call from signal handlers"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 17:40:59",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "logging",
        "sys",
        "signal"
      ],
      "description": "Asynchronous main entry point function that initializes and runs an email forwarding SMTP server with logging, configuration validation, and graceful shutdown handling.",
      "docstring": "Main application entry point.",
      "id": 1491,
      "imports": [
        "import asyncio",
        "import logging",
        "import sys",
        "import signal",
        "from datetime import datetime",
        "from config import settings",
        "from utils.logger import setup_logging",
        "from forwarder.smtp_server import SMTPServer"
      ],
      "imports_required": [
        "import asyncio",
        "import logging",
        "import sys",
        "import signal",
        "from datetime import datetime",
        "from config import settings",
        "from utils.logger import setup_logging",
        "from forwarder.smtp_server import SMTPServer"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 72,
      "line_start": 36,
      "name": "main_v9",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary application entry point for an email forwarding service. It orchestrates the complete lifecycle of the application including: setting up logging infrastructure, validating configuration settings, creating and starting an SMTP server instance, registering signal handlers for graceful shutdown on SIGINT/SIGTERM signals, and handling fatal errors with appropriate cleanup. The function is designed to run indefinitely until interrupted by signals or fatal errors.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It runs indefinitely until interrupted by a signal (SIGINT/SIGTERM) or a fatal exception occurs, at which point it exits the process with status code 0 (graceful shutdown) or 1 (error condition).",
      "settings_required": [
        "config module with settings object that has validate_config() method",
        "utils.logger module with setup_logging() function",
        "forwarder.smtp_server module with SMTPServer class",
        "Configuration settings accessible via settings object (specific settings depend on validate_config() implementation)",
        "Appropriate permissions to bind to SMTP ports (typically port 25, 587, or custom port)",
        "Signal handling support (SIGINT and SIGTERM) - standard on Unix-like systems"
      ],
      "source_code": "async def main():\n    \"\"\"Main application entry point.\"\"\"\n    \n    # Set up logging\n    setup_logging()\n    \n    # Print banner\n    print_banner()\n    \n    try:\n        # Validate configuration\n        settings.validate_config()\n        logger.info(\"Configuration validated successfully\")\n        \n        # Create and start SMTP server\n        smtp_server = SMTPServer()\n        \n        # Set up graceful shutdown\n        def signal_handler(signum, frame):\n            logger.info(f\"Received signal {signum}, initiating shutdown...\")\n            smtp_server.stop()\n            sys.exit(0)\n            \n        signal.signal(signal.SIGINT, signal_handler)\n        signal.signal(signal.SIGTERM, signal_handler)\n        \n        # Start server\n        logger.info(\"Starting Email Forwarder service...\")\n        await smtp_server.run_forever()\n        \n    except KeyboardInterrupt:\n        logger.info(\"Received keyboard interrupt, shutting down...\")\n    except Exception as e:\n        logger.error(f\"Fatal error: {e}\")\n        sys.exit(1)\n    finally:\n        logger.info(\"Email Forwarder service stopped\")",
      "source_file": "/tf/active/vicechatdev/email-forwarder/src/main.py",
      "tags": [
        "async",
        "smtp",
        "email",
        "server",
        "entry-point",
        "signal-handling",
        "graceful-shutdown",
        "logging",
        "configuration",
        "daemon",
        "service",
        "forwarder",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:59:48.480032",
      "usage_example": "import asyncio\nimport logging\nimport sys\nimport signal\nfrom datetime import datetime\nfrom config import settings\nfrom utils.logger import setup_logging\nfrom forwarder.smtp_server import SMTPServer\n\nasync def main():\n    \"\"\"Main application entry point.\"\"\"\n    setup_logging()\n    print_banner()\n    try:\n        settings.validate_config()\n        logger.info(\"Configuration validated successfully\")\n        smtp_server = SMTPServer()\n        def signal_handler(signum, frame):\n            logger.info(f\"Received signal {signum}, initiating shutdown...\")\n            smtp_server.stop()\n            sys.exit(0)\n        signal.signal(signal.SIGINT, signal_handler)\n        signal.signal(signal.SIGTERM, signal_handler)\n        logger.info(\"Starting Email Forwarder service...\")\n        await smtp_server.run_forever()\n    except KeyboardInterrupt:\n        logger.info(\"Received keyboard interrupt, shutting down...\")\n    except Exception as e:\n        logger.error(f\"Fatal error: {e}\")\n        sys.exit(1)\n    finally:\n        logger.info(\"Email Forwarder service stopped\")\n\nif __name__ == \"__main__\":\n    asyncio.run(main())"
    },
    {
      "best_practices": [
        "Ensure proper logging configuration before calling this function to capture all import activities",
        "Verify that an admin user exists in the system before running the import",
        "The function performs duplicate checking using both cdoc_uid (preferred) and doc_number (fallback) to prevent duplicate imports",
        "Documents with cdoc_uid metadata take precedence over doc_number for duplicate detection",
        "Monitor the import summary logs to track success rates and identify issues",
        "Handle FileCloud connection errors gracefully - the function will log errors but continue processing remaining documents",
        "Ensure sufficient disk space and memory for processing large document sets",
        "Run this function in a controlled environment as it performs database writes and file operations",
        "Consider implementing rate limiting if importing large numbers of documents to avoid overwhelming FileCloud API",
        "Review failed_count in the summary to identify and address systematic import issues"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 13:37:15",
      "decorators": [],
      "dependencies": [
        "os",
        "sys",
        "logging",
        "tempfile",
        "uuid",
        "io",
        "typing",
        "datetime",
        "CDocs.db.db_operations",
        "CDocs.models.document",
        "CDocs.models.user_extensions",
        "CDocs.controllers.filecloud_controller",
        "CDocs.controllers.document_controller",
        "CDocs.config",
        "FC_api",
        "metadata_catalog",
        "traceback"
      ],
      "description": "Main execution function that orchestrates the import of controlled documents from FileCloud into a Neo4j database, checking for duplicates and managing document metadata.",
      "docstring": "Main execution function",
      "id": 852,
      "imports": [
        "import os",
        "import sys",
        "import logging",
        "import tempfile",
        "import uuid",
        "import io",
        "from typing import Dict",
        "from typing import List",
        "from typing import Any",
        "from typing import Optional",
        "from datetime import datetime",
        "from CDocs.db import db_operations as db",
        "from CDocs.models.document import ControlledDocument",
        "from CDocs.models.document import DocumentVersion",
        "from CDocs.models.user_extensions import DocUser",
        "from CDocs.controllers.filecloud_controller import get_filecloud_client",
        "from CDocs.controllers.filecloud_controller import upload_document_to_filecloud",
        "from CDocs.controllers.filecloud_controller import get_filecloud_document_path",
        "from CDocs.controllers.filecloud_controller import ensure_document_folders",
        "from CDocs.controllers.filecloud_controller import FileCloudError",
        "from CDocs.controllers.document_controller import create_document_version",
        "from CDocs.config import settings",
        "from FC_api import FileCloudAPI",
        "from CDocs.controllers.document_controller import set_current_version",
        "from CDocs.controllers.filecloud_controller import upload_document_to_filecloud",
        "from metadata_catalog import MetadataCatalog",
        "import traceback",
        "import traceback",
        "import traceback",
        "import traceback"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import logging",
        "import tempfile",
        "import uuid",
        "import io",
        "from typing import Dict, List, Any, Optional",
        "from datetime import datetime",
        "from CDocs.db import db_operations as db",
        "from CDocs.models.document import ControlledDocument, DocumentVersion",
        "from CDocs.models.user_extensions import DocUser",
        "from CDocs.controllers.filecloud_controller import get_filecloud_client, upload_document_to_filecloud, get_filecloud_document_path, ensure_document_folders, FileCloudError",
        "from CDocs.controllers.document_controller import create_document_version, set_current_version",
        "from CDocs.config import settings",
        "from FC_api import FileCloudAPI",
        "from metadata_catalog import MetadataCatalog",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 565,
      "line_start": 483,
      "name": "main_v8",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for a document import script that searches FileCloud for controlled documents, validates them against existing records in Neo4j (by UID or document number), and imports new documents while tracking success/failure statistics. It handles duplicate detection, error logging, and provides a comprehensive summary of the import operation.",
      "return_annotation": null,
      "return_explained": "Returns None. The function performs side effects (importing documents, logging results) rather than returning a value. Success/failure information is logged and tracked internally through counters (imported_count, skipped_count, failed_count).",
      "settings_required": [
        "Logger instance named 'logger' must be configured before calling this function",
        "get_admin_user() function must be available and return a DocUser object",
        "search_filecloud_for_documents() function must be available to search FileCloud",
        "check_document_exists_by_uid() function must be available to check Neo4j by UID",
        "check_document_exists_by_doc_number() function must be available to check Neo4j by document number",
        "import_document_from_filecloud() function must be available to perform the actual import",
        "FileCloud API credentials and connection settings must be configured in CDocs.config.settings",
        "Neo4j database connection must be configured and accessible through CDocs.db.db_operations",
        "Admin user must exist in the system for document creation"
      ],
      "source_code": "def main():\n    \"\"\"Main execution function\"\"\"\n    try:\n        logger.info(\"Starting FileCloud document import script\")\n        \n        # Get admin user for document creation\n        admin_user = get_admin_user()\n        if not admin_user:\n            logger.error(\"Cannot proceed without an admin user\")\n            return\n        \n        # Search for controlled documents in FileCloud\n        documents = search_filecloud_for_documents()\n        if not documents:\n            logger.info(\"No documents found to import\")\n            return\n            \n        # Process found documents\n        imported_count = 0\n        skipped_count = 0\n        failed_count = 0\n        \n        for doc in documents:\n            file_path = doc.get('file_path')\n            metadata = doc.get('metadata', {})\n            \n            # First check if document has a cdoc_uid in metadata\n            cdoc_uid = metadata.get('cdoc_uid')\n            if cdoc_uid:\n                logger.info(f\"Found document with cdoc_uid: {cdoc_uid}\")\n                # Check if this document exists in Neo4j by UID\n                existing_doc = check_document_exists_by_uid(cdoc_uid)\n                if existing_doc:\n                    logger.info(f\"Document with UID {cdoc_uid} is already managed in Neo4j - skipping\")\n                    skipped_count += 1\n                    continue\n                    \n                # If we have a cdoc_uid but it's not in Neo4j, this means the document \n                # was meant to be managed but isn't - import it with that ID\n                logger.info(f\"Document with UID {cdoc_uid} not found in Neo4j but has cdoc_uid - will import\")\n            \n            # Then check by document number as fallback\n            doc_number = metadata.get('doc_number')\n            if doc_number and not file_path:\n                logger.warning(f\"Skipping document with missing file_path: {doc_number}\")\n                skipped_count += 1\n                continue\n            \n            if doc_number and not cdoc_uid:\n                # Skip if document already exists in database by number\n                existing_doc = check_document_exists_by_doc_number(doc_number)\n                if existing_doc:\n                    logger.info(f\"Document {doc_number} already exists in database by number - skipping\")\n                    skipped_count += 1\n                    continue\n            \n            # Import document\n            logger.info(\"file path: \" + file_path)\n            logger.info(\"metadata: \" + str(metadata))\n            logger.info(\"admin_user: \" + str(admin_user.name))\n            result = import_document_from_filecloud(file_path, metadata, admin_user)\n            #result=None\n            \n            if result and result.get('success', False):\n                imported_count += 1\n                logger.info(f\"Successfully imported document: {result.get('doc_number')}\")\n            else:\n                failed_count += 1\n                error_msg = result.get('message') if result else \"Unknown error\"\n                logger.error(f\"Failed to import document: {error_msg}\")\n        \n        # Report summary\n        logger.info(\"===== Import Summary =====\")\n        logger.info(f\"Total documents found in FileCloud: {len(documents)}\")\n        logger.info(f\"Documents imported: {imported_count}\")\n        logger.info(f\"Documents skipped (already exist): {skipped_count}\")\n        logger.info(f\"Documents failed to import: {failed_count}\")\n        logger.info(\"=========================\")\n        \n    except Exception as e:\n        logger.error(f\"Error in main execution: {e}\")\n        import traceback\n        logger.error(traceback.format_exc())",
      "source_file": "/tf/active/vicechatdev/CDocs/FC_sync.py",
      "tags": [
        "document-management",
        "filecloud",
        "neo4j",
        "import",
        "batch-processing",
        "controlled-documents",
        "duplicate-detection",
        "orchestration",
        "logging",
        "error-handling",
        "database-sync"
      ],
      "updated_at": "2025-12-07T01:59:48.478577",
      "usage_example": "# Ensure logger is configured\nimport logging\nlogger = logging.getLogger(__name__)\nlogger.setLevel(logging.INFO)\nhandler = logging.StreamHandler()\nformatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')\nhandler.setFormatter(formatter)\nlogger.addHandler(handler)\n\n# Ensure all required helper functions are defined:\n# - get_admin_user()\n# - search_filecloud_for_documents()\n# - check_document_exists_by_uid(cdoc_uid)\n# - check_document_exists_by_doc_number(doc_number)\n# - import_document_from_filecloud(file_path, metadata, admin_user)\n\n# Execute the main import process\nif __name__ == '__main__':\n    main()\n    # Output will be logged showing:\n    # - Documents found in FileCloud\n    # - Import progress for each document\n    # - Final summary with counts of imported/skipped/failed documents"
    },
    {
      "best_practices": [
        "Ensure the Config class is properly configured before calling this function",
        "Set up all required environment variables (API keys, credentials) before execution",
        "Verify that the logging configuration section exists in the config file to avoid setup_logging errors",
        "The function uses sys.exit(1) on errors, so it should only be called from the main execution context, not from library code",
        "Monitor the logger output for analysis progress and any error messages",
        "Ensure ContractAnalyzer class is imported and available in the module scope",
        "The function expects logger to be available globally after setup_logging is called",
        "Consider wrapping the call to main() in a if __name__ == '__main__' block for proper module execution",
        "Fatal errors are logged before exit, so ensure logging is configured to capture these messages",
        "The function does not accept command-line arguments; all configuration must be in the config file"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "Required by ContractAnalyzer if it needs to access cloud storage for contract documents",
          "import": "from utils.filecloud_client import FileCloudClient",
          "optional": false
        },
        {
          "condition": "Required by ContractAnalyzer for processing contract documents",
          "import": "from utils.document_processor import DocumentProcessor",
          "optional": false
        },
        {
          "condition": "Required by ContractAnalyzer for LLM-based contract analysis",
          "import": "from utils.llm_client import LLMClient",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 10:21:25",
      "decorators": [],
      "dependencies": [
        "logging",
        "sys",
        "csv",
        "json",
        "pandas",
        "typing",
        "datetime",
        "time",
        "concurrent.futures",
        "pathlib"
      ],
      "description": "Main entry point function that orchestrates the contract validity analysis workflow by loading configuration, setting up logging, initializing the analyzer, running analysis, and reporting results.",
      "docstring": "Main entry point for the analyzer.",
      "id": 409,
      "imports": [
        "import logging",
        "import os",
        "import sys",
        "import csv",
        "import json",
        "import pandas as pd",
        "from typing import Dict",
        "from typing import List",
        "from typing import Any",
        "from typing import Optional",
        "from datetime import datetime",
        "import time",
        "import concurrent.futures",
        "from pathlib import Path",
        "from config.config import Config",
        "from utils.filecloud_client import FileCloudClient",
        "from utils.document_processor import DocumentProcessor",
        "from utils.llm_client import LLMClient",
        "from utils.logging_utils import setup_logging",
        "from utils.logging_utils import get_logger",
        "from utils.logging_utils import PerformanceLogger",
        "from utils.logging_utils import ProgressLogger",
        "from datetime import datetime",
        "from datetime import date"
      ],
      "imports_required": [
        "import logging",
        "import sys",
        "from config.config import Config",
        "from utils.logging_utils import setup_logging",
        "from utils.logging_utils import get_logger"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 595,
      "line_start": 568,
      "name": "main_v7",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary execution entry point for a contract validity analysis application. It coordinates the entire analysis pipeline: loads application configuration, initializes logging infrastructure, creates and runs a ContractAnalyzer instance to process contracts, retrieves and logs summary statistics, and handles fatal errors with appropriate exit codes. It's designed to be called when the application starts, typically from a if __name__ == '__main__' block.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It either completes successfully with logged output or exits the program with sys.exit(1) on fatal errors.",
      "settings_required": [
        "Configuration file accessible by Config class (typically config.yaml or config.json)",
        "Logging configuration section in the config file with appropriate settings",
        "ContractAnalyzer configuration parameters in the config file",
        "Environment variables or API keys required by LLMClient (e.g., OPENAI_API_KEY, ANTHROPIC_API_KEY)",
        "File system access permissions for reading contracts and writing logs",
        "Network access if FileCloudClient needs to connect to cloud storage"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point for the analyzer.\"\"\"\n    try:\n        # Load configuration\n        config_manager = Config()\n        config = config_manager.config\n        \n        # Set up logging\n        setup_logging(config.get_section('logging'))\n        \n        logger.info(\"Starting Contract Validity Analyzer\")\n        \n        # Initialize and run analyzer\n        analyzer = ContractAnalyzer(config)\n        results = analyzer.analyze_contracts()\n        \n        # Print summary\n        stats = analyzer.get_summary_stats()\n        if stats:\n            logger.info(\"Analysis Summary:\")\n            for key, value in stats.items():\n                logger.info(f\"  {key}: {value}\")\n        \n        logger.info(\"Analysis complete!\")\n        \n    except Exception as e:\n        logger.error(f\"Fatal error: {e}\")\n        sys.exit(1)",
      "source_file": "/tf/active/vicechatdev/contract_validity_analyzer/core/analyzer.py",
      "tags": [
        "entry-point",
        "orchestration",
        "contract-analysis",
        "workflow",
        "configuration",
        "logging",
        "error-handling",
        "main-function",
        "analyzer",
        "batch-processing"
      ],
      "updated_at": "2025-12-07T01:59:48.476989",
      "usage_example": "if __name__ == '__main__':\n    # Ensure config file exists at expected location\n    # e.g., config/config.yaml with sections for 'logging' and analyzer settings\n    # Set required environment variables:\n    # export OPENAI_API_KEY='your-api-key'\n    \n    # Import the ContractAnalyzer class\n    from analyzer.contract_analyzer import ContractAnalyzer\n    \n    # Call main function\n    main()\n    \n    # The function will:\n    # 1. Load configuration from config file\n    # 2. Set up logging based on config\n    # 3. Initialize ContractAnalyzer\n    # 4. Run contract analysis\n    # 5. Log summary statistics\n    # 6. Exit with code 0 on success or 1 on error"
    },
    {
      "best_practices": [
        "This function uses __file__ to locate the database file, so it must be run as part of a Python script file, not in an interactive interpreter",
        "The function expects a specific directory structure relative to the script location; ensure 'remarkable_replica_v2/replica_database.json' exists",
        "The function prints extensive diagnostic output; redirect stdout if you need to capture this information programmatically",
        "The root-level detection logic checks for empty strings, None values, and empty string literals - this redundancy ensures compatibility with different data formats",
        "Node types are converted to numeric codes (1 for folders, 4 for other types) which appears to be a specific encoding scheme for the reMarkable system",
        "Consider wrapping this function in error handling for production use, as it assumes the JSON file exists and has the expected structure"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:59:32",
      "decorators": [],
      "dependencies": [
        "json",
        "pathlib"
      ],
      "description": "A test function that analyzes a reMarkable tablet replica database JSON file to identify and list all root-level entries (folders and documents without parent nodes).",
      "docstring": "Test finding root-level entries",
      "id": 2133,
      "imports": [
        "import json",
        "from pathlib import Path"
      ],
      "imports_required": [
        "import json",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 49,
      "line_start": 6,
      "name": "test_root_finding",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function is designed to test and debug the identification of root-level nodes in a reMarkable tablet's file system structure. It reads a JSON database file, iterates through all nodes, checks both 'parent_uuid' and 'metadata.parent' fields to determine if a node is at the root level, and prints detailed diagnostic information about each node. It's useful for understanding the structure of reMarkable's file system and verifying root-level entry detection logic.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It produces side effects by printing diagnostic information to stdout, including details about each node analyzed and a summary of root-level entries found.",
      "settings_required": [
        "Requires a specific file structure: a 'remarkable_replica_v2' directory in the same directory as the script, containing a 'replica_database.json' file",
        "The JSON database file must follow the expected schema with 'nodes' as a top-level key, where each node contains fields like 'parent_uuid', 'metadata', 'name', 'node_type', 'hash', and 'size'"
      ],
      "source_code": "def test_root_finding():\n    \"\"\"Test finding root-level entries\"\"\"\n    \n    database_path = Path(__file__).parent / \"remarkable_replica_v2\" / \"replica_database.json\"\n    \n    with open(database_path, 'r') as f:\n        database = json.load(f)\n    \n    print(\"\ud83d\udd0d Analyzing root-level nodes...\")\n    print(f\"\ud83d\udcca Total nodes: {len(database['nodes'])}\")\n    \n    root_entries = []\n    for uuid, node in database['nodes'].items():\n        # Check both parent_uuid field and metadata.parent field\n        parent_uuid = node.get('parent_uuid')\n        metadata_parent = node.get('metadata', {}).get('parent', '')\n        \n        print(f\"\\n\ud83d\udd39 Node: {uuid}\")\n        print(f\"   Name: {node.get('name', 'Unknown')}\")\n        print(f\"   Type: {node.get('node_type', 'Unknown')}\")\n        print(f\"   parent_uuid: {repr(parent_uuid)}\")\n        print(f\"   metadata.parent: {repr(metadata_parent)}\")\n        \n        # A node is root-level if both parent fields indicate no parent\n        is_root_level = (\n            (parent_uuid is None or parent_uuid == '' or parent_uuid == \"\") and\n            (metadata_parent == '' or metadata_parent is None)\n        )\n        \n        print(f\"   Is root-level: {is_root_level}\")\n        \n        if is_root_level:\n            node_type = 1 if node['node_type'] == 'folder' else 4\n            root_entries.append({\n                'hash': node['hash'],\n                'uuid': uuid,\n                'node_type': node_type,\n                'size': node.get('size', 0),\n                'name': node.get('name', 'Unknown')\n            })\n    \n    print(f\"\\n\ud83d\udcca Found {len(root_entries)} root-level items:\")\n    for entry in root_entries:\n        print(f\"   - {entry['name']} ({entry['uuid'][:8]}...): type={entry['node_type']}, size={entry['size']}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/debug_root.py",
      "tags": [
        "testing",
        "debugging",
        "file-system",
        "remarkable-tablet",
        "json-parsing",
        "tree-structure",
        "root-detection",
        "diagnostic",
        "data-analysis"
      ],
      "updated_at": "2025-12-07T01:59:32.125959",
      "usage_example": "# Ensure the required directory structure exists:\n# ./remarkable_replica_v2/replica_database.json\n\nimport json\nfrom pathlib import Path\n\ndef test_root_finding():\n    \"\"\"Test finding root-level entries\"\"\"\n    database_path = Path(__file__).parent / \"remarkable_replica_v2\" / \"replica_database.json\"\n    with open(database_path, 'r') as f:\n        database = json.load(f)\n    print(\"\ud83d\udd0d Analyzing root-level nodes...\")\n    print(f\"\ud83d\udcca Total nodes: {len(database['nodes'])}\")\n    root_entries = []\n    for uuid, node in database['nodes'].items():\n        parent_uuid = node.get('parent_uuid')\n        metadata_parent = node.get('metadata', {}).get('parent', '')\n        print(f\"\\n\ud83d\udd39 Node: {uuid}\")\n        print(f\"   Name: {node.get('name', 'Unknown')}\")\n        print(f\"   Type: {node.get('node_type', 'Unknown')}\")\n        print(f\"   parent_uuid: {repr(parent_uuid)}\")\n        print(f\"   metadata.parent: {repr(metadata_parent)}\")\n        is_root_level = (\n            (parent_uuid is None or parent_uuid == '' or parent_uuid == \"\") and\n            (metadata_parent == '' or metadata_parent is None)\n        )\n        print(f\"   Is root-level: {is_root_level}\")\n        if is_root_level:\n            node_type = 1 if node['node_type'] == 'folder' else 4\n            root_entries.append({\n                'hash': node['hash'],\n                'uuid': uuid,\n                'node_type': node_type,\n                'size': node.get('size', 0),\n                'name': node.get('name', 'Unknown')\n            })\n    print(f\"\\n\ud83d\udcca Found {len(root_entries)} root-level items:\")\n    for entry in root_entries:\n        print(f\"   - {entry['name']} ({entry['uuid'][:8]}...): type={entry['node_type']}, size={entry['size']}\")\n\n# Run the test\ntest_root_finding()"
    },
    {
      "best_practices": [
        "This function expects to be called as a script entry point with sys.argv available",
        "Requires companion functions (load_database, print_database_analysis, print_sync_info) to be defined in the same module",
        "Uses Path objects for cross-platform file path handling",
        "Provides user-friendly error messages with emoji indicators for better CLI experience",
        "Returns boolean for exit code handling - use sys.exit(0 if main() else 1) pattern",
        "Command-line argument at index 1 should be a valid directory path to a reMarkable replica",
        "The function performs validation before processing to fail fast on invalid inputs"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:59:09",
      "decorators": [],
      "dependencies": [],
      "description": "Main entry point function that analyzes a reMarkable tablet replica directory by loading its database, printing analysis results, and displaying sync information.",
      "docstring": "Main entry point",
      "id": 2132,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 283,
      "line_start": 260,
      "name": "main_v66",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the command-line interface entry point for analyzing a reMarkable tablet's local replica directory. It accepts an optional directory path as a command-line argument, validates the directory exists, loads the database from it, and prints comprehensive analysis including database contents and sync information. It's designed to be called when the script is executed directly.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the analysis completed successfully (directory exists, database loaded, and analysis printed), False if the replica directory was not found or the database failed to load.",
      "settings_required": [
        "Requires a reMarkable tablet replica directory structure to exist on the filesystem",
        "Expects helper functions to be defined in the same module: load_database(), print_database_analysis(), and print_sync_info()",
        "Default replica directory path is './remarkable_complete_replica' if not specified via command-line argument"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point\"\"\"\n    if len(sys.argv) > 1:\n        replica_dir = Path(sys.argv[1])\n    else:\n        replica_dir = Path.cwd() / \"remarkable_complete_replica\"\n    \n    if not replica_dir.exists():\n        print(f\"\u274c Replica directory not found: {replica_dir}\")\n        print(f\"\ud83d\udca1 Usage: python {sys.argv[0]} [replica_directory]\")\n        return False\n    \n    print(f\"\ud83d\udcc1 Analyzing replica: {replica_dir}\")\n    \n    # Load and analyze database\n    database = load_database(replica_dir)\n    if not database:\n        return False\n    \n    print_database_analysis(database)\n    print_sync_info(replica_dir)\n    \n    print(f\"\\n\u2705 Analysis complete!\")\n    return True",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "cli",
        "entry-point",
        "file-system",
        "analysis",
        "remarkable-tablet",
        "database",
        "command-line",
        "validation",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:59:09.437397",
      "usage_example": "# Run from command line with default directory:\n# python script.py\n\n# Run from command line with custom directory:\n# python script.py /path/to/replica\n\n# Call programmatically:\nif __name__ == '__main__':\n    success = main()\n    sys.exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "Ensure the replica_dir Path object points to a valid directory before calling this function",
        "The sync_log.json file should follow the expected schema with 'last_sync', 'root_hash', and 'nodes_synced' keys",
        "This function is designed for console output and uses emoji characters - ensure your terminal supports UTF-8 encoding",
        "The function gracefully handles missing files and JSON parsing errors, making it safe to call even if the sync log doesn't exist",
        "The root hash is truncated to 16 characters for display purposes - if you need the full hash, consider modifying the function or reading the file directly",
        "This is a display-only function with no return value - use it for logging/monitoring purposes, not for programmatic access to sync data"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:58:55",
      "decorators": [],
      "dependencies": [
        "json",
        "pathlib"
      ],
      "description": "Reads and displays synchronization log information from a JSON file in a replica directory, including last sync time, root hash, and number of nodes synced.",
      "docstring": "Print sync log information",
      "id": 2131,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "import json",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 257,
      "line_start": 239,
      "name": "print_sync_info",
      "parameters": [
        {
          "annotation": "Path",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "replica_dir"
        }
      ],
      "parameters_explained": {
        "replica_dir": "A Path object pointing to the replica directory that contains the 'sync_log.json' file. This directory should be the root of a replica structure where synchronization logs are stored. The function will look for a file named 'sync_log.json' directly within this directory."
      },
      "parent_class": null,
      "purpose": "This function provides a user-friendly display of synchronization status for a replica directory. It reads a 'sync_log.json' file and prints formatted information about the last synchronization operation, including timestamp, root hash (truncated for readability), and the count of synced nodes. It handles missing files and read errors gracefully with warning messages.",
      "return_annotation": null,
      "return_explained": "This function returns None. It performs side effects by printing formatted synchronization information to stdout using print() statements. Output includes emoji-prefixed status messages showing last sync time, root hash (first 16 characters), and number of nodes synced, or warning messages if the log file is missing or cannot be read.",
      "settings_required": [
        "The replica_dir must contain a 'sync_log.json' file with the expected structure containing 'last_sync', 'root_hash', and 'nodes_synced' keys",
        "The sync_log.json file must be valid JSON and UTF-8 encoded"
      ],
      "source_code": "def print_sync_info(replica_dir: Path):\n    \"\"\"Print sync log information\"\"\"\n    sync_log_file = replica_dir / \"sync_log.json\"\n    \n    if not sync_log_file.exists():\n        print(f\"\u26a0\ufe0f No sync log found\")\n        return\n    \n    try:\n        with open(sync_log_file, 'r', encoding='utf-8') as f:\n            sync_log = json.load(f)\n        \n        print(f\"\\n\ud83d\udccb SYNC INFORMATION:\")\n        print(f\"   \ud83d\udd50 Last sync: {sync_log.get('last_sync', 'unknown')}\")\n        print(f\"   \ud83d\udd11 Root hash: {sync_log.get('root_hash', 'unknown')[:16]}...\")\n        print(f\"   \ud83d\udcca Nodes synced: {sync_log.get('nodes_synced', 0)}\")\n        \n    except Exception as e:\n        print(f\"\u26a0\ufe0f Error reading sync log: {e}\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "logging",
        "synchronization",
        "file-io",
        "json",
        "display",
        "status",
        "replica",
        "monitoring",
        "error-handling"
      ],
      "updated_at": "2025-12-07T01:58:55.347749",
      "usage_example": "from pathlib import Path\nimport json\n\n# Setup: Create a sample replica directory with sync log\nreplica_path = Path('./my_replica')\nreplica_path.mkdir(exist_ok=True)\n\n# Create a sample sync_log.json file\nsync_data = {\n    'last_sync': '2024-01-15 14:30:00',\n    'root_hash': 'abc123def456ghi789jkl012mno345pqr678',\n    'nodes_synced': 42\n}\n\nwith open(replica_path / 'sync_log.json', 'w', encoding='utf-8') as f:\n    json.dump(sync_data, f)\n\n# Use the function\nprint_sync_info(replica_path)\n\n# Output:\n# \ud83d\udccb SYNC INFORMATION:\n#    \ud83d\udd50 Last sync: 2024-01-15 14:30:00\n#    \ud83d\udd11 Root hash: abc123def456ghi7...\n#    \ud83d\udcca Nodes synced: 42"
    },
    {
      "best_practices": [
        "Ensure the database dictionary is properly structured with all expected keys before calling this function to avoid KeyError exceptions",
        "The function depends on three helper functions (analyze_hierarchy, analyze_file_types, print_node_tree) that must be available in scope",
        "This function is designed for console output and debugging; for programmatic access to analysis data, consider using the helper functions directly",
        "The content tree display is limited to the first 3 root nodes with max depth of 3 to prevent overwhelming output; adjust these limits if needed",
        "Consider redirecting stdout if you need to capture the output for logging or further processing"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:58:37",
      "decorators": [],
      "dependencies": [],
      "description": "Prints a comprehensive, formatted analysis of a reMarkable tablet replica database, including statistics, hierarchy information, file types, and a content tree visualization.",
      "docstring": "Print comprehensive database analysis",
      "id": 2130,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 236,
      "line_start": 171,
      "name": "print_database_analysis",
      "parameters": [
        {
          "annotation": "Dict[str, Any]",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "database"
        }
      ],
      "parameters_explained": {
        "database": "A dictionary containing the complete reMarkable replica database structure. Expected keys include: 'replica_info' (dict with 'created', 'replica_dir', 'statistics' subdicts), 'nodes' (dict mapping UUIDs to node objects), and 'hierarchy' (dict representing parent-child relationships). The 'statistics' subdict should contain counts like 'folders', 'documents', 'notebooks', 'pdfs_extracted', 'notebooks_extracted', and 'total_files'."
      },
      "parent_class": null,
      "purpose": "This function provides a detailed console output report for analyzing the structure and contents of a reMarkable tablet database replica. It displays metadata about the replica creation, node counts, file statistics, hierarchy depth analysis, file type distributions, and a visual tree representation of the content structure. This is useful for debugging, auditing, or understanding the organization of a reMarkable tablet's content after it has been replicated to a local database format.",
      "return_annotation": null,
      "return_explained": "This function returns None. It produces side effects by printing formatted analysis output directly to stdout using print() statements. The output includes emoji-decorated sections for replica info, build statistics, hierarchy analysis, file type analysis, and a content tree visualization.",
      "settings_required": [
        "Requires helper functions: analyze_hierarchy(database), analyze_file_types(database), and print_node_tree(nodes, hierarchy, root_uuid, max_depth) to be defined in the same module or imported",
        "The database parameter must be properly structured with expected keys and nested dictionaries as described in parameters_explained"
      ],
      "source_code": "def print_database_analysis(database: Dict[str, Any]):\n    \"\"\"Print comprehensive database analysis\"\"\"\n    replica_info = database.get('replica_info', {})\n    nodes = database.get('nodes', {})\n    \n    print(\"\ud83d\udd0d REMARKABLE REPLICA DATABASE ANALYSIS\")\n    print(\"=\" * 60)\n    \n    # Basic info\n    print(f\"\ud83d\udcc5 Created: {replica_info.get('created', 'unknown')}\")\n    print(f\"\ud83d\udcc1 Replica directory: {replica_info.get('replica_dir', 'unknown')}\")\n    print(f\"\ud83d\udcca Total nodes: {len(nodes)}\")\n    \n    # Statistics\n    stats = replica_info.get('statistics', {})\n    if stats:\n        print(f\"\\n\ud83d\udcc8 BUILD STATISTICS:\")\n        print(f\"   \ud83d\udcc2 Folders: {stats.get('folders', 0)}\")\n        print(f\"   \ud83d\udcc4 Documents: {stats.get('documents', 0)}\")\n        print(f\"   \ud83d\udcd4 Notebooks: {stats.get('notebooks', 0)}\")\n        print(f\"   \ud83d\udcc4 PDFs extracted: {stats.get('pdfs_extracted', 0)}\")\n        print(f\"   \ud83d\udcdd Notebooks extracted: {stats.get('notebooks_extracted', 0)}\")\n        print(f\"   \ud83d\udcce Total files: {stats.get('total_files', 0)}\")\n    \n    # Hierarchy analysis\n    hierarchy_info = analyze_hierarchy(database)\n    print(f\"\\n\ud83c\udf33 HIERARCHY ANALYSIS:\")\n    print(f\"   \ud83d\udccd Root nodes: {len(hierarchy_info['root_nodes'])}\")\n    print(f\"   \ud83d\udccf Maximum depth: {hierarchy_info['max_depth']}\")\n    print(f\"   \ud83d\udc65 Parent-child relationships: {hierarchy_info['total_parent_relationships']}\")\n    \n    # Node type distribution\n    print(f\"   \ud83d\udcca Node type distribution:\")\n    for node_type, count in hierarchy_info['type_counts'].items():\n        print(f\"     \u2022 {node_type}: {count}\")\n    \n    # Depth distribution\n    print(f\"   \ud83d\udccf Depth distribution:\")\n    for depth in sorted(hierarchy_info['depth_counts'].keys()):\n        count = hierarchy_info['depth_counts'][depth]\n        print(f\"     \u2022 Depth {depth}: {count} nodes\")\n    \n    # File type analysis\n    file_stats = analyze_file_types(database)\n    print(f\"\\n\ud83d\udcce FILE TYPE ANALYSIS:\")\n    print(f\"   \ud83d\udcc4 PDF files: {file_stats['pdf_files']}\")\n    print(f\"   \ud83d\udcdd Notebook components: {file_stats['notebook_files']}\")\n    print(f\"   \ud83d\udd8a\ufe0f reMarkable (.rm) files: {file_stats['rm_files']}\")\n    print(f\"   \ud83d\udcc4 Content files: {file_stats['content_files']}\")\n    print(f\"   \ud83d\udcce Total extracted files: {file_stats['total_extracted_files']}\")\n    \n    if file_stats['file_extensions']:\n        print(f\"   \ud83d\udcca File extensions:\")\n        for ext, count in sorted(file_stats['file_extensions'].items(), key=lambda x: x[1], reverse=True):\n            print(f\"     \u2022 {ext}: {count}\")\n    \n    # Tree view of content\n    print(f\"\\n\ud83c\udf33 CONTENT TREE:\")\n    hierarchy = database.get('hierarchy', {})\n    \n    for root_uuid in hierarchy_info['root_nodes'][:3]:  # Show first 3 root nodes\n        print_node_tree(nodes, hierarchy, root_uuid, max_depth=3)\n        print()\n    \n    if len(hierarchy_info['root_nodes']) > 3:\n        print(f\"   ... ({len(hierarchy_info['root_nodes']) - 3} more root nodes)\")",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "remarkable",
        "database-analysis",
        "reporting",
        "visualization",
        "console-output",
        "statistics",
        "hierarchy",
        "file-analysis",
        "tree-view",
        "metadata"
      ],
      "updated_at": "2025-12-07T01:58:37.944224",
      "usage_example": "# Assuming you have a database dict from a reMarkable replica\n# and the required helper functions are available\n\ndatabase = {\n    'replica_info': {\n        'created': '2024-01-15T10:30:00',\n        'replica_dir': '/path/to/replica',\n        'statistics': {\n            'folders': 10,\n            'documents': 25,\n            'notebooks': 15,\n            'pdfs_extracted': 20,\n            'notebooks_extracted': 15,\n            'total_files': 50\n        }\n    },\n    'nodes': {\n        'uuid-1': {'type': 'CollectionType', 'name': 'My Folder'},\n        'uuid-2': {'type': 'DocumentType', 'name': 'My Document'}\n    },\n    'hierarchy': {\n        'uuid-1': ['uuid-2']\n    }\n}\n\n# Print comprehensive analysis\nprint_database_analysis(database)"
    },
    {
      "best_practices": [
        "Ensure the database parameter contains a 'nodes' key with properly structured node data to avoid empty results",
        "File paths in 'extracted_files' should be valid path strings that can be processed by pathlib.Path",
        "The function safely handles missing 'nodes' or 'extracted_files' keys by using .get() with default values",
        "File extension matching is case-insensitive (uses .lower())",
        "Files without extensions are tracked under the '[no extension]' key in file_extensions dictionary",
        "The function does not validate if file paths actually exist on the filesystem, it only analyzes the paths stored in the database"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:58:20",
      "decorators": [],
      "dependencies": [],
      "description": "Analyzes file types within a replica database structure, counting different file categories and tracking file extensions.",
      "docstring": "Analyze file types in the replica",
      "id": 2129,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 168,
      "line_start": 132,
      "name": "analyze_file_types",
      "parameters": [
        {
          "annotation": "Dict[str, Any]",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "database"
        }
      ],
      "parameters_explained": {
        "database": "A dictionary containing replica data with a 'nodes' key. Each node should have a 'uuid' as key and contain an 'extracted_files' list with file paths. Expected structure: {'nodes': {uuid: {'extracted_files': [file_paths]}}}"
      },
      "parent_class": null,
      "purpose": "This function processes a database dictionary containing nodes with extracted files, categorizing them by type (PDF, notebook, RM, content files) and counting occurrences of each file extension. It's designed for analyzing file distribution in a replica system, likely for a note-taking or document management application (possibly reMarkable tablet data).",
      "return_annotation": "Dict[str, Any]",
      "return_explained": "Returns a dictionary with file statistics containing: 'pdf_files' (int: count of PDF files), 'notebook_files' (int: files in notebook directories), 'rm_files' (int: .rm format files), 'content_files' (int: files named 'content'), 'total_extracted_files' (int: total file count), and 'file_extensions' (dict: mapping of extensions to their counts, with '[no extension]' for files without extensions)",
      "settings_required": [],
      "source_code": "def analyze_file_types(database: Dict[str, Any]) -> Dict[str, Any]:\n    \"\"\"Analyze file types in the replica\"\"\"\n    nodes = database.get('nodes', {})\n    \n    file_stats = {\n        'pdf_files': 0,\n        'notebook_files': 0,\n        'rm_files': 0,\n        'content_files': 0,\n        'total_extracted_files': 0,\n        'file_extensions': {}\n    }\n    \n    for uuid, node in nodes.items():\n        extracted_files = node.get('extracted_files', [])\n        file_stats['total_extracted_files'] += len(extracted_files)\n        \n        for file_path in extracted_files:\n            file_path_obj = Path(file_path)\n            ext = file_path_obj.suffix.lower()\n            \n            if ext == '.pdf':\n                file_stats['pdf_files'] += 1\n            elif ext == '.rm':\n                file_stats['rm_files'] += 1\n            elif file_path_obj.name == 'content':\n                file_stats['content_files'] += 1\n            elif '_notebook' in str(file_path_obj.parent):\n                file_stats['notebook_files'] += 1\n            \n            # Count extensions\n            if ext:\n                file_stats['file_extensions'][ext] = file_stats['file_extensions'].get(ext, 0) + 1\n            else:\n                file_stats['file_extensions']['[no extension]'] = file_stats['file_extensions'].get('[no extension]', 0) + 1\n    \n    return file_stats",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "file-analysis",
        "data-processing",
        "statistics",
        "file-types",
        "replica",
        "document-management",
        "remarkable",
        "file-counting",
        "extension-analysis"
      ],
      "updated_at": "2025-12-07T01:58:20.121775",
      "usage_example": "from pathlib import Path\nfrom typing import Dict, Any\n\ndef analyze_file_types(database: Dict[str, Any]) -> Dict[str, Any]:\n    # ... function code ...\n    pass\n\n# Example usage\ndatabase = {\n    'nodes': {\n        'uuid-1': {\n            'extracted_files': [\n                '/path/to/document.pdf',\n                '/path/to/notes_notebook/page1.rm',\n                '/path/to/content'\n            ]\n        },\n        'uuid-2': {\n            'extracted_files': [\n                '/path/to/another.pdf',\n                '/path/to/file.txt'\n            ]\n        }\n    }\n}\n\nresults = analyze_file_types(database)\nprint(f\"Total files: {results['total_extracted_files']}\")\nprint(f\"PDF files: {results['pdf_files']}\")\nprint(f\"Extensions: {results['file_extensions']}\")"
    },
    {
      "best_practices": [
        "Ensure the 'uuid' parameter corresponds to a valid key in the 'nodes' dictionary to avoid returning early without output",
        "Set an appropriate 'max_depth' value to prevent excessive output or stack overflow with deeply nested hierarchies",
        "The 'last_modified' field in nodes should be a timestamp in milliseconds (Unix epoch * 1000) for proper date formatting",
        "The function silently handles missing nodes and invalid timestamps using try-except blocks, so validate your data beforehand for debugging",
        "Children are sorted alphabetically by name for consistent output; ensure node names are present to avoid sorting issues",
        "The function uses Unicode characters (\ud83d\udcc1, \ud83d\udcc4, \ud83d\udcdd, \u2502) which may not display correctly in all terminal environments",
        "This function has side effects (prints to stdout); consider redirecting output or capturing it if you need to process the tree structure programmatically"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:58:03",
      "decorators": [],
      "dependencies": [
        "datetime",
        "typing"
      ],
      "description": "Recursively prints a hierarchical tree visualization of nodes with icons, names, file counts, and modification dates to the console.",
      "docstring": "Print a tree view of nodes",
      "id": 2128,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "from datetime import datetime",
        "from typing import Dict, Any, List"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 129,
      "line_start": 82,
      "name": "print_node_tree",
      "parameters": [
        {
          "annotation": "Dict[str, Any]",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "nodes"
        },
        {
          "annotation": "Dict[str, List[str]]",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "hierarchy"
        },
        {
          "annotation": "str",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "uuid"
        },
        {
          "annotation": "int",
          "default": "0",
          "kind": "positional_or_keyword",
          "name": "depth"
        },
        {
          "annotation": "int",
          "default": "3",
          "kind": "positional_or_keyword",
          "name": "max_depth"
        },
        {
          "annotation": "str",
          "default": "''",
          "kind": "positional_or_keyword",
          "name": "prefix"
        }
      ],
      "parameters_explained": {
        "depth": "Integer representing the current depth level in the tree traversal. Starts at 0 for the root node and increments with each recursive call. Used to control indentation and enforce max_depth limits.",
        "hierarchy": "Dictionary mapping parent node UUIDs (strings) to lists of child node UUIDs (strings). Defines the parent-child relationships between nodes. Keys are parent UUIDs, values are lists of their children's UUIDs.",
        "max_depth": "Integer specifying the maximum depth to traverse in the tree. Prevents infinite recursion and limits output size. Default is 3 levels deep.",
        "nodes": "Dictionary mapping node UUIDs (strings) to node data dictionaries. Each node dictionary should contain keys like 'name', 'node_type', 'extracted_files' (list), and 'last_modified' (timestamp in milliseconds). This is the complete collection of all nodes in the hierarchy.",
        "prefix": "String used for indentation and tree structure visualization. Contains characters like spaces, '\u2502', and '\u2514' to create the tree lines. Builds up with each recursive level.",
        "uuid": "String identifier of the current node to print. This is the starting point for the tree traversal and should be a valid key in the 'nodes' dictionary."
      },
      "parent_class": null,
      "purpose": "This function provides a visual representation of a node hierarchy (like a file system or document structure) by recursively traversing parent-child relationships and displaying them in a tree format with appropriate indentation, icons based on node type, and metadata such as file counts and last modified dates. It's useful for debugging, visualizing data structures, or providing user-friendly output of hierarchical data.",
      "return_annotation": null,
      "return_explained": "This function returns None (no explicit return value). It produces side effects by printing the tree structure directly to stdout using the print() function.",
      "settings_required": [],
      "source_code": "def print_node_tree(nodes: Dict[str, Any], hierarchy: Dict[str, List[str]], \n                   uuid: str, depth: int = 0, max_depth: int = 3, prefix: str = \"\"):\n    \"\"\"Print a tree view of nodes\"\"\"\n    if depth > max_depth:\n        return\n    \n    node = nodes.get(uuid)\n    if not node:\n        return\n    \n    # Format node info\n    name = node.get('name', 'unnamed')\n    node_type = node.get('node_type', 'unknown')\n    extracted_files = node.get('extracted_files', [])\n    \n    # Icon based on type\n    if node_type == 'folder':\n        icon = \"\ud83d\udcc1\"\n    elif extracted_files and any(f.endswith('.pdf') for f in extracted_files):\n        icon = \"\ud83d\udcc4\"\n    elif extracted_files:\n        icon = \"\ud83d\udcdd\"\n    else:\n        icon = \"\ud83d\udcc4\"\n    \n    # Additional info\n    info_parts = []\n    if extracted_files:\n        info_parts.append(f\"{len(extracted_files)} files\")\n    \n    if node.get('last_modified'):\n        try:\n            mod_time = datetime.fromtimestamp(int(node['last_modified']) / 1000)\n            info_parts.append(f\"modified {mod_time.strftime('%Y-%m-%d')}\")\n        except:\n            pass\n    \n    info_str = f\" ({', '.join(info_parts)})\" if info_parts else \"\"\n    \n    print(f\"{prefix}{icon} {name}{info_str}\")\n    \n    # Show children\n    children = hierarchy.get(uuid, [])\n    if children and depth < max_depth:\n        for i, child_uuid in enumerate(sorted(children, key=lambda x: nodes.get(x, {}).get('name', ''))):\n            is_last = i == len(children) - 1\n            child_prefix = prefix + (\"    \" if is_last else \"\u2502   \")\n            print_node_tree(nodes, hierarchy, child_uuid, depth + 1, max_depth, child_prefix)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "tree-visualization",
        "hierarchical-data",
        "console-output",
        "recursive",
        "file-system",
        "pretty-print",
        "node-traversal",
        "data-visualization",
        "formatting"
      ],
      "updated_at": "2025-12-07T01:58:03.636917",
      "usage_example": "from datetime import datetime\nfrom typing import Dict, Any, List\n\ndef print_node_tree(nodes: Dict[str, Any], hierarchy: Dict[str, List[str]], \n                   uuid: str, depth: int = 0, max_depth: int = 3, prefix: str = \"\"):\n    # ... function code ...\n    pass\n\n# Example data structure\nnodes = {\n    'root-123': {\n        'name': 'Documents',\n        'node_type': 'folder',\n        'extracted_files': [],\n        'last_modified': 1704067200000\n    },\n    'child-456': {\n        'name': 'Report.pdf',\n        'node_type': 'file',\n        'extracted_files': ['report.pdf'],\n        'last_modified': 1704153600000\n    },\n    'child-789': {\n        'name': 'Notes.txt',\n        'node_type': 'file',\n        'extracted_files': ['notes.txt'],\n        'last_modified': 1704240000000\n    }\n}\n\nhierarchy = {\n    'root-123': ['child-456', 'child-789']\n}\n\n# Print the tree starting from root\nprint_node_tree(nodes, hierarchy, 'root-123', max_depth=2)"
    },
    {
      "best_practices": [
        "This function uses binary units (1024-based) rather than decimal units (1000-based). Be aware of the difference between KB (kibibyte, 1024 bytes) and kB (kilobyte, 1000 bytes) when displaying to users.",
        "The function assumes non-negative input values. Consider adding input validation if negative values might be passed.",
        "For very large files (terabytes or larger), the function will display them in GB, which may result in large numbers. Consider extending the function to support TB and PB units if needed.",
        "The function returns one decimal place for all units except bytes. This provides a good balance between precision and readability for most use cases.",
        "Consider caching the result if the same file size needs to be formatted multiple times to avoid redundant calculations."
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:57:42",
      "decorators": [],
      "dependencies": [],
      "description": "Converts a file size in bytes to a human-readable string format with appropriate units (B, KB, MB, or GB).",
      "docstring": "Format file size in human readable format",
      "id": 2127,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 79,
      "line_start": 70,
      "name": "format_file_size",
      "parameters": [
        {
          "annotation": "int",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "size_bytes"
        }
      ],
      "parameters_explained": {
        "size_bytes": "An integer representing the file size in bytes. Expected to be a non-negative integer value. The function will convert this value to the most appropriate unit (B, KB, MB, or GB) based on its magnitude. Values less than 1024 are displayed in bytes, 1024-1048575 in KB, 1048576-1073741823 in MB, and 1073741824 or greater in GB."
      },
      "parent_class": null,
      "purpose": "This function takes a file size expressed in bytes and formats it into a more readable representation by automatically selecting the most appropriate unit (bytes, kilobytes, megabytes, or gigabytes). It uses binary units (1024-based) and formats decimal values to one decimal place for units larger than bytes. This is commonly used in file managers, download progress indicators, storage displays, and any application that needs to present file sizes to users in an intuitive format.",
      "return_annotation": "str",
      "return_explained": "Returns a string containing the formatted file size with one decimal place precision (for KB, MB, GB) and the appropriate unit suffix. For sizes less than 1024 bytes, returns an integer value with 'B' suffix. Examples: '512 B', '1.5 KB', '2.3 MB', '1.2 GB'. The format uses a space between the numeric value and the unit.",
      "settings_required": [],
      "source_code": "def format_file_size(size_bytes: int) -> str:\n    \"\"\"Format file size in human readable format\"\"\"\n    if size_bytes < 1024:\n        return f\"{size_bytes} B\"\n    elif size_bytes < 1024 * 1024:\n        return f\"{size_bytes / 1024:.1f} KB\"\n    elif size_bytes < 1024 * 1024 * 1024:\n        return f\"{size_bytes / (1024 * 1024):.1f} MB\"\n    else:\n        return f\"{size_bytes / (1024 * 1024 * 1024):.1f} GB\"",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "file-size",
        "formatting",
        "human-readable",
        "bytes-conversion",
        "utility",
        "display",
        "storage",
        "file-management",
        "string-formatting",
        "unit-conversion"
      ],
      "updated_at": "2025-12-07T01:57:42.275134",
      "usage_example": "# Basic usage examples\nsize1 = format_file_size(500)\nprint(size1)  # Output: '500 B'\n\nsize2 = format_file_size(2048)\nprint(size2)  # Output: '2.0 KB'\n\nsize3 = format_file_size(5242880)\nprint(size3)  # Output: '5.0 MB'\n\nsize4 = format_file_size(1073741824)\nprint(size4)  # Output: '1.0 GB'\n\n# Practical example with file operations\nimport os\nfile_path = 'example.txt'\nif os.path.exists(file_path):\n    file_size = os.path.getsize(file_path)\n    readable_size = format_file_size(file_size)\n    print(f'File size: {readable_size}')"
    },
    {
      "best_practices": [
        "This function uses 1024 as the conversion factor (binary units), which is standard for file systems. Be aware that some contexts use 1000 (decimal units) instead.",
        "The function does not validate that size_bytes is non-negative. Consider adding validation if negative values should be rejected.",
        "The function stops at TB (terabytes). For extremely large values (petabytes and beyond), it will still display in TB units.",
        "The output is always formatted to one decimal place, which provides a good balance between precision and readability for most use cases.",
        "When size_bytes is exactly 0, the function returns '0.0 B', which is correct and expected behavior.",
        "This function is purely for display purposes and should not be used for calculations or comparisons of file sizes."
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:23:01",
      "decorators": [],
      "dependencies": [],
      "description": "Converts a file size in bytes to a human-readable string format with appropriate units (B, KB, MB, GB, TB).",
      "docstring": "Convert bytes to human readable format",
      "id": 241,
      "imports": [
        "import os",
        "import sys",
        "from datetime import datetime",
        "from sharepoint_graph_client import SharePointGraphClient",
        "from filecloud_client import FileCloudClient",
        "from config import Config"
      ],
      "imports_required": [],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 28,
      "line_start": 19,
      "name": "format_file_size_v1",
      "parameters": [
        {
          "annotation": null,
          "default": null,
          "kind": "positional_or_keyword",
          "name": "size_bytes"
        }
      ],
      "parameters_explained": {
        "size_bytes": "The file size in bytes as a numeric value (int or float). Can be None, in which case the function returns 'Unknown'. Expected to be non-negative for meaningful results, though negative values will be processed (resulting in negative formatted output)."
      },
      "parent_class": null,
      "purpose": "This utility function formats raw byte values into user-friendly file size representations by automatically selecting the most appropriate unit (bytes, kilobytes, megabytes, gigabytes, or terabytes) and formatting the output to one decimal place. It handles None values gracefully by returning 'Unknown'. Commonly used in file management systems, storage displays, and data transfer interfaces where file sizes need to be presented to end users.",
      "return_annotation": null,
      "return_explained": "Returns a string representing the formatted file size. Format is '{value:.1f} {unit}' where value is rounded to one decimal place and unit is one of 'B', 'KB', 'MB', 'GB', or 'TB'. Returns 'Unknown' if size_bytes is None. Examples: '1.5 KB', '250.0 MB', '1.2 GB', 'Unknown'.",
      "settings_required": [],
      "source_code": "def format_file_size(size_bytes):\n    \"\"\"Convert bytes to human readable format\"\"\"\n    if size_bytes is None:\n        return \"Unknown\"\n    \n    for unit in ['B', 'KB', 'MB', 'GB']:\n        if size_bytes < 1024.0:\n            return f\"{size_bytes:.1f} {unit}\"\n        size_bytes /= 1024.0\n    return f\"{size_bytes:.1f} TB\"",
      "source_file": "/tf/active/vicechatdev/SPFCsync/dry_run_test.py",
      "tags": [
        "file-size",
        "formatting",
        "utility",
        "human-readable",
        "bytes-conversion",
        "storage",
        "data-display",
        "string-formatting"
      ],
      "updated_at": "2025-12-07T01:57:42.272802",
      "usage_example": "# Basic usage examples\nsize1 = format_file_size(1024)\nprint(size1)  # Output: '1.0 KB'\n\nsize2 = format_file_size(1536000)\nprint(size2)  # Output: '1.5 MB'\n\nsize3 = format_file_size(5368709120)\nprint(size3)  # Output: '5.0 GB'\n\nsize4 = format_file_size(None)\nprint(size4)  # Output: 'Unknown'\n\nsize5 = format_file_size(500)\nprint(size5)  # Output: '500.0 B'\n\n# Use in file listing context\nimport os\nfile_path = 'example.txt'\nif os.path.exists(file_path):\n    file_size = os.path.getsize(file_path)\n    readable_size = format_file_size(file_size)\n    print(f'{file_path}: {readable_size}')"
    },
    {
      "best_practices": [
        "Ensure the database dictionary contains both 'nodes' and 'hierarchy' keys for complete analysis",
        "Node dictionaries should include 'parent_uuid', 'depth', and 'node_type' fields for accurate statistics",
        "The function handles missing keys gracefully using .get() with defaults, but providing complete data yields better results",
        "Root nodes are identified by the absence of 'parent_uuid' or a falsy value for that field",
        "The function assumes depth values are pre-calculated in the node data; it does not compute depth from hierarchy",
        "For large hierarchies, consider the memory implications of storing all statistics in the returned dictionary"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:57:24",
      "decorators": [],
      "dependencies": [],
      "description": "Analyzes a hierarchical database structure to extract statistics about nodes, their relationships, depths, and types.",
      "docstring": "Analyze the hierarchy structure",
      "id": 2126,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 67,
      "line_start": 35,
      "name": "analyze_hierarchy",
      "parameters": [
        {
          "annotation": "Dict[str, Any]",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "database"
        }
      ],
      "parameters_explained": {
        "database": "A dictionary containing the hierarchical database structure. Expected to have two keys: 'nodes' (dict mapping UUIDs to node dictionaries with properties like 'parent_uuid', 'depth', and 'node_type') and 'hierarchy' (dict representing parent-child relationships). The 'nodes' dict should contain node objects with optional fields: 'parent_uuid' (string or None), 'depth' (integer), and 'node_type' (string)."
      },
      "parent_class": null,
      "purpose": "This function processes a database dictionary containing nodes and hierarchy information to provide comprehensive statistics including root nodes identification, depth distribution analysis, node type categorization, and parent-child relationship counts. It's useful for understanding the structure and composition of hierarchical data systems, tree structures, or organizational charts stored in dictionary format.",
      "return_annotation": "Dict[str, Any]",
      "return_explained": "Returns a dictionary with five keys: 'root_nodes' (list of UUIDs for nodes without parents), 'depth_counts' (dict mapping depth levels to count of nodes at that depth), 'max_depth' (integer representing the maximum depth in the hierarchy), 'type_counts' (dict mapping node types to their counts), and 'total_parent_relationships' (integer count of entries in the hierarchy dict).",
      "settings_required": [],
      "source_code": "def analyze_hierarchy(database: Dict[str, Any]) -> Dict[str, Any]:\n    \"\"\"Analyze the hierarchy structure\"\"\"\n    nodes = database.get('nodes', {})\n    hierarchy = database.get('hierarchy', {})\n    \n    # Find root nodes (nodes with no parent)\n    root_nodes = []\n    for uuid, node in nodes.items():\n        if not node.get('parent_uuid'):\n            root_nodes.append(uuid)\n    \n    # Calculate depth statistics\n    depth_counts = {}\n    max_depth = 0\n    \n    for uuid, node in nodes.items():\n        depth = node.get('depth', 0)\n        depth_counts[depth] = depth_counts.get(depth, 0) + 1\n        max_depth = max(max_depth, depth)\n    \n    # Count node types\n    type_counts = {}\n    for uuid, node in nodes.items():\n        node_type = node.get('node_type', 'unknown')\n        type_counts[node_type] = type_counts.get(node_type, 0) + 1\n    \n    return {\n        'root_nodes': root_nodes,\n        'depth_counts': depth_counts,\n        'max_depth': max_depth,\n        'type_counts': type_counts,\n        'total_parent_relationships': len(hierarchy)\n    }",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "hierarchy",
        "tree-structure",
        "data-analysis",
        "statistics",
        "graph-analysis",
        "node-analysis",
        "depth-calculation",
        "parent-child-relationships",
        "data-processing",
        "structural-analysis"
      ],
      "updated_at": "2025-12-07T01:57:24.658174",
      "usage_example": "from typing import Dict, Any\n\ndef analyze_hierarchy(database: Dict[str, Any]) -> Dict[str, Any]:\n    nodes = database.get('nodes', {})\n    hierarchy = database.get('hierarchy', {})\n    root_nodes = []\n    for uuid, node in nodes.items():\n        if not node.get('parent_uuid'):\n            root_nodes.append(uuid)\n    depth_counts = {}\n    max_depth = 0\n    for uuid, node in nodes.items():\n        depth = node.get('depth', 0)\n        depth_counts[depth] = depth_counts.get(depth, 0) + 1\n        max_depth = max(max_depth, depth)\n    type_counts = {}\n    for uuid, node in nodes.items():\n        node_type = node.get('node_type', 'unknown')\n        type_counts[node_type] = type_counts.get(node_type, 0) + 1\n    return {\n        'root_nodes': root_nodes,\n        'depth_counts': depth_counts,\n        'max_depth': max_depth,\n        'type_counts': type_counts,\n        'total_parent_relationships': len(hierarchy)\n    }\n\n# Example usage\ndatabase = {\n    'nodes': {\n        'uuid1': {'parent_uuid': None, 'depth': 0, 'node_type': 'root'},\n        'uuid2': {'parent_uuid': 'uuid1', 'depth': 1, 'node_type': 'branch'},\n        'uuid3': {'parent_uuid': 'uuid1', 'depth': 1, 'node_type': 'branch'},\n        'uuid4': {'parent_uuid': 'uuid2', 'depth': 2, 'node_type': 'leaf'}\n    },\n    'hierarchy': {\n        'uuid1': ['uuid2', 'uuid3'],\n        'uuid2': ['uuid4']\n    }\n}\n\nresult = analyze_hierarchy(database)\nprint(result)\n# Output: {'root_nodes': ['uuid1'], 'depth_counts': {0: 1, 1: 2, 2: 1}, 'max_depth': 2, 'type_counts': {'root': 1, 'branch': 2, 'leaf': 1}, 'total_parent_relationships': 2}"
    },
    {
      "best_practices": [
        "Always check if the returned value is None before using it to avoid AttributeError",
        "Ensure the replica_dir parameter is a valid Path object, not a string",
        "The function expects a file named exactly 'replica_database.json' in the provided directory",
        "Error messages are printed to stdout; consider logging for production use",
        "The function uses UTF-8 encoding by default; ensure your JSON files are UTF-8 encoded",
        "Broad exception catching may hide specific errors; consider more granular error handling for production",
        "The function returns None on both file-not-found and parsing errors; check error messages to distinguish between failure types"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:57:05",
      "decorators": [],
      "dependencies": [
        "json",
        "pathlib"
      ],
      "description": "Loads a JSON database file from a replica directory and returns its contents as a dictionary, with error handling for missing files or parsing failures.",
      "docstring": "Load the replica database",
      "id": 2125,
      "imports": [
        "import json",
        "import sys",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import List"
      ],
      "imports_required": [
        "import json",
        "from pathlib import Path",
        "from typing import Dict, Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 32,
      "line_start": 19,
      "name": "load_database",
      "parameters": [
        {
          "annotation": "Path",
          "default": null,
          "kind": "positional_or_keyword",
          "name": "replica_dir"
        }
      ],
      "parameters_explained": {
        "replica_dir": "A Path object representing the directory containing the replica database. This directory should contain a file named 'replica_database.json'. The Path object should be from pathlib.Path and point to a valid directory location."
      },
      "parent_class": null,
      "purpose": "This function is designed to safely load a replica database stored as a JSON file. It checks for file existence, handles encoding properly (UTF-8), and provides user-friendly error messages. Returns None if the file doesn't exist or if loading fails, making it suitable for applications that need to work with replica/backup database files.",
      "return_annotation": "Dict[str, Any]",
      "return_explained": "Returns a dictionary (Dict[str, Any]) containing the parsed JSON data from the replica database file. Returns None if the database file doesn't exist or if any exception occurs during loading (e.g., invalid JSON, permission errors, encoding issues). The dictionary structure depends on the content of the JSON file.",
      "settings_required": [
        "A valid replica directory path containing a 'replica_database.json' file",
        "Read permissions for the replica_database.json file",
        "Valid UTF-8 encoded JSON file"
      ],
      "source_code": "def load_database(replica_dir: Path) -> Dict[str, Any]:\n    \"\"\"Load the replica database\"\"\"\n    database_file = replica_dir / \"replica_database.json\"\n    \n    if not database_file.exists():\n        print(f\"\u274c Database file not found: {database_file}\")\n        return None\n    \n    try:\n        with open(database_file, 'r', encoding='utf-8') as f:\n            return json.load(f)\n    except Exception as e:\n        print(f\"\u274c Failed to load database: {e}\")\n        return None",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/analyze_replica.py",
      "tags": [
        "database",
        "json",
        "file-loading",
        "error-handling",
        "replica",
        "data-persistence",
        "file-io",
        "pathlib"
      ],
      "updated_at": "2025-12-07T01:57:05.204234",
      "usage_example": "from pathlib import Path\nimport json\nfrom typing import Dict, Any\n\ndef load_database(replica_dir: Path) -> Dict[str, Any]:\n    \"\"\"Load the replica database\"\"\"\n    database_file = replica_dir / \"replica_database.json\"\n    \n    if not database_file.exists():\n        print(f\"\u274c Database file not found: {database_file}\")\n        return None\n    \n    try:\n        with open(database_file, 'r', encoding='utf-8') as f:\n            return json.load(f)\n    except Exception as e:\n        print(f\"\u274c Failed to load database: {e}\")\n        return None\n\n# Usage\nreplica_path = Path('/path/to/replica')\ndb_data = load_database(replica_path)\n\nif db_data is not None:\n    print(f\"Database loaded successfully with {len(db_data)} entries\")\n    # Process database data\nelse:\n    print(\"Failed to load database\")"
    },
    {
      "best_practices": [
        "Review the console output carefully to understand which fixes were applied",
        "Run the generated 'fixed_upload_test.py' to verify the fixes before using in production",
        "Ensure the ImplementationFixer class is properly implemented with all required fix methods",
        "Check that you have write permissions in the directory where this function is executed",
        "Consider running this in a test environment first before applying to production code",
        "The function prints next steps - follow them sequentially for best results",
        "Manually verify JWT device description changes as noted in the output"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:56:14",
      "decorators": [],
      "dependencies": [
        "json",
        "os",
        "time",
        "pathlib",
        "typing"
      ],
      "description": "Orchestrates the application of multiple critical fixes to align test code with real application behavior, including user agent, metadata, page data, JWT, and field corrections.",
      "docstring": "Apply all critical fixes to match real app behavior",
      "id": 2122,
      "imports": [
        "import json",
        "import os",
        "import time",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any"
      ],
      "imports_required": [
        "import json",
        "import os",
        "import time",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 393,
      "line_start": 368,
      "name": "main_v46",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for an implementation fixing tool that applies a series of corrections to test code to match the behavior of a real application. It instantiates an ImplementationFixer object, applies six different types of fixes (user agent, metadata source, page data content, last opened field, JWT device description, and creates a fixed upload test), generates a summary of applied fixes, and provides next steps for verification. This is typically used in development/testing scenarios where test code needs to be synchronized with production application behavior.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating whether any fixes were successfully applied. Returns True if at least one fix was applied (len(fixer.fixes_applied) > 0), False otherwise. This allows calling code to determine if the fixing process made any changes.",
      "settings_required": [
        "Requires ImplementationFixer class to be defined in the same module or imported",
        "Requires write permissions to create 'fixed_upload_test.py' file in the current directory",
        "May require specific file paths or configuration files that ImplementationFixer depends on"
      ],
      "source_code": "def main():\n    \"\"\"Apply all critical fixes to match real app behavior\"\"\"\n    print(\"\ud83d\udd27 IMPLEMENTATION FIXER\")\n    print(\"=\" * 50)\n    print(\"Applying fixes identified by dry run analysis...\")\n    \n    fixer = ImplementationFixer()\n    \n    # Apply all fixes\n    fixer.fix_user_agent()\n    fixer.fix_metadata_source()\n    fixer.fix_pagedata_content()\n    fixer.fix_last_opened_field()\n    fixer.fix_jwt_device_description()\n    fixer.create_fixed_upload_test()\n    \n    # Generate summary\n    fixer.generate_fix_summary()\n    \n    print(\"\\n\ud83c\udfaf NEXT STEPS:\")\n    print(\"1. Review the fixes applied above\")\n    print(\"2. Run 'python3 fixed_upload_test.py' to verify fixed structure\")\n    print(\"3. Test with actual upload once satisfied with the fixes\")\n    print(\"4. Manually update JWT device description in auth process\")\n    \n    return len(fixer.fixes_applied) > 0",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/implementation_fixer.py",
      "tags": [
        "testing",
        "fixing",
        "automation",
        "code-generation",
        "test-synchronization",
        "implementation-fixer",
        "orchestration",
        "main-entry-point",
        "user-agent",
        "metadata",
        "jwt",
        "upload-test"
      ],
      "updated_at": "2025-12-07T01:56:14.726443",
      "usage_example": "if __name__ == '__main__':\n    success = main()\n    if success:\n        print('Fixes applied successfully')\n    else:\n        print('No fixes were applied')\n    exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function should be called as the entry point when running the script standalone",
        "The function handles KeyboardInterrupt gracefully, allowing users to cancel the sync operation",
        "Returns proper exit codes (0 for success, 1 for failure) suitable for shell scripting",
        "Provides visual feedback with emoji indicators for better user experience",
        "Catches all exceptions to prevent unhandled crashes and provides error messages",
        "Displays a summary of synchronized content after successful completion",
        "Should be wrapped in if __name__ == '__main__': block for proper module behavior"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:55:28",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "Entry point function that orchestrates a complete synchronization of a reMarkable tablet's content, displaying progress and summary statistics.",
      "docstring": "Main function for standalone execution",
      "id": 2120,
      "imports": [
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import List",
        "from typing import Set",
        "from dataclasses import dataclass",
        "import re",
        "import shutil"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict, Any, Optional, List, Set",
        "from dataclasses import dataclass",
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 860,
      "line_start": 826,
      "name": "main_v65",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a standalone reMarkable tablet synchronization tool. It initializes a RemarkableReplicaSync instance, performs a complete replica sync, handles errors gracefully, and displays a formatted summary of the synchronized library including folder and document counts. It's designed for command-line execution with user-friendly console output including emoji indicators for status.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful completion, 1 for any failure (sync failure, keyboard interrupt, or exception). This follows standard Unix convention for process exit codes.",
      "settings_required": [
        "RemarkableReplicaSync class must be defined and available in the same module or imported",
        "RemarkableReplicaSync must have methods: sync_complete_replica(), get_folders(), get_root_documents(), get_documents_in_folder(uuid)",
        "Appropriate reMarkable API credentials or authentication configured for RemarkableReplicaSync",
        "Network connectivity to reMarkable cloud services"
      ],
      "source_code": "def main():\n    \"\"\"Main function for standalone execution\"\"\"\n    print(\"\ud83d\udd04 reMarkable Replica Sync - Standalone Tool\")\n    print(\"=\" * 50)\n    \n    try:\n        sync = RemarkableReplicaSync()\n        success = sync.sync_complete_replica()\n        \n        if success:\n            print(\"\\n\u2705 Sync completed successfully!\")\n            \n            # Show summary\n            folders = sync.get_folders()\n            root_docs = sync.get_root_documents()\n            \n            print(f\"\\n\ud83d\udcca Current Library:\")\n            print(f\"   \ud83d\udcc2 Folders: {len(folders)}\")\n            print(f\"   \ud83d\udcc4 Root Documents: {len(root_docs)}\")\n            \n            for folder in folders:\n                folder_docs = sync.get_documents_in_folder(folder['uuid'])\n                print(f\"   \ud83d\udcc2 {folder['name']}: {len(folder_docs)} documents\")\n        else:\n            print(\"\\n\u274c Sync failed!\")\n            return 1\n            \n    except KeyboardInterrupt:\n        print(\"\\n\u26a0\ufe0f Sync interrupted by user\")\n        return 1\n    except Exception as e:\n        print(f\"\\n\u274c Sync error: {e}\")\n        return 1\n    \n    return 0",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/sync_replica.py",
      "tags": [
        "entry-point",
        "synchronization",
        "remarkable-tablet",
        "cli-tool",
        "error-handling",
        "standalone",
        "main-function",
        "document-management",
        "cloud-sync"
      ],
      "updated_at": "2025-12-07T01:56:14.689217",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "Always run the dry-run phase first to preview changes before applying them - this is built into the function flow",
        "Ensure proper authentication is configured before calling this function",
        "The function requires user interaction via stdin, so it should only be called in interactive CLI contexts, not in automated scripts",
        "Handle the boolean return value to determine if the repair was successful for proper exit codes or logging",
        "Ensure network connectivity to reMarkable cloud services before running",
        "Consider backing up the original root.docSchema before running this repair tool",
        "The function catches broad exceptions during initialization but not during the repair process itself - ensure RootDocSchemaRepair.run_repair() has proper error handling"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "Required by RootDocSchemaRepair class for parsing and manipulating docSchema data",
          "import": "import json",
          "optional": false
        },
        {
          "condition": "Required by RootDocSchemaRepair class for timing operations or delays",
          "import": "import time",
          "optional": false
        },
        {
          "condition": "Required by RootDocSchemaRepair class for calculating file hashes",
          "import": "import hashlib",
          "optional": false
        },
        {
          "condition": "Required by RootDocSchemaRepair class for file path operations",
          "import": "from pathlib import Path",
          "optional": false
        },
        {
          "condition": "Required by RootDocSchemaRepair class for type annotations",
          "import": "from typing import Dict, List, Tuple, Any",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:51:13",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "Entry point function that orchestrates a repair process for a corrupted reMarkable root.docSchema file by running a dry-run analysis first, then optionally applying the repair based on user confirmation.",
      "docstring": "Run the root.docSchema repair",
      "id": 2109,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Tuple",
        "from typing import Any",
        "import requests",
        "from auth import RemarkableAuth"
      ],
      "imports_required": [
        "from auth import RemarkableAuth"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 438,
      "line_start": 405,
      "name": "main_v64",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main CLI interface for the RootDocSchemaRepair tool. It guides users through a two-phase repair process: first performing a dry-run to preview changes (preserving folders and invoice PDFs, recalculating document sizes), then prompting for confirmation before applying the actual repair. It handles initialization errors and provides user-friendly console output with emoji indicators for status updates.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating the success or failure of the repair operation. Returns True if the repair was successfully applied (when user confirms), False if the dry run failed, user cancelled the operation, or an exception occurred during initialization. The return value reflects the final state of the repair process.",
      "settings_required": [
        "RootDocSchemaRepair class must be defined and importable in the same module or imported from another module",
        "RemarkableAuth authentication credentials must be configured (likely API tokens or device tokens for reMarkable cloud access)",
        "Network access to reMarkable cloud services for uploading repaired docSchema",
        "Valid reMarkable account with existing root.docSchema file to repair"
      ],
      "source_code": "def main():\n    \"\"\"Run the root.docSchema repair\"\"\"\n    try:\n        repair_tool = RootDocSchemaRepair()\n        \n        print(\"\ud83d\udd27 reMarkable Root DocSchema Repair Tool\")\n        print(\"=\" * 50)\n        print(\"This tool will fix the corrupted root.docSchema by:\")\n        print(\"  \u2705 Preserving working entries (folders + invoice PDFs)\")\n        print(\"  \ud83d\udd27 Recalculating correct sizes for broken documents\")\n        print(\"  \u2b06\ufe0f  Uploading the corrected root.docSchema\")\n        print()\n        \n        # First run in dry-run mode to show what will be done\n        print(\"\ud83d\udd0d Running DRY RUN first to analyze the repair plan...\")\n        dry_run_success = repair_tool.run_repair(dry_run=True)\n        \n        if dry_run_success:\n            print(\"\\n\" + \"=\" * 50)\n            response = input(\"\ud83d\ude80 Dry run successful! Apply the repair? (yes/no): \").strip().lower()\n            \n            if response in ['yes', 'y']:\n                print(\"\ud83d\ude80 Applying the repair...\")\n                return repair_tool.run_repair(dry_run=False)\n            else:\n                print(\"\u274c Repair cancelled by user\")\n                return False\n        else:\n            print(\"\u274c Dry run failed - cannot proceed with repair\")\n            return False\n            \n    except Exception as e:\n        print(f\"\u274c Repair tool failed to initialize: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/fix_root_docschema.py",
      "tags": [
        "cli",
        "repair-tool",
        "remarkable",
        "docschema",
        "interactive",
        "dry-run",
        "file-repair",
        "user-confirmation",
        "error-handling",
        "cloud-sync"
      ],
      "updated_at": "2025-12-07T01:56:14.688486",
      "usage_example": "if __name__ == '__main__':\n    # Run the repair tool interactively\n    success = main()\n    \n    if success:\n        print('Repair completed successfully!')\n        exit(0)\n    else:\n        print('Repair failed or was cancelled')\n        exit(1)"
    },
    {
      "best_practices": [
        "This function should only be used for testing and simulation purposes, not for actual production uploads",
        "Ensure the FixedUploadTest class is properly initialized and available before calling this function",
        "Review the generated JSON results file to understand what fixes were applied and verified",
        "The function creates a timestamped results file to avoid overwriting previous test runs",
        "Always check the return value to determine if the system is ready for real uploads",
        "The function handles exceptions gracefully and returns False on failure, making it safe to use in automated testing pipelines",
        "Ensure sufficient disk space is available for the test_results directory before running"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:34:45",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "json",
        "time"
      ],
      "description": "Executes a simulation-only test of a fixed upload process for reMarkable documents, verifying that all critical fixes are correctly applied without making actual API calls.",
      "docstring": "Run the fixed upload test - SIMULATION ONLY",
      "id": 2067,
      "imports": [
        "import os",
        "import json",
        "import time",
        "import uuid",
        "import hashlib",
        "import base64",
        "import binascii",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 393,
      "line_start": 349,
      "name": "main_v63",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a comprehensive test harness for validating document upload fixes in a reMarkable integration. It simulates the upload process, verifies that all fixes are properly applied, saves detailed results to a JSON file, and provides a summary report. The function is designed to ensure that the upload logic is correct before attempting real API calls, reducing the risk of errors and failed uploads.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating whether all fixes were successfully verified (True) or if some fixes need review (False). This return value can be used to determine if the system is ready for real uploads.",
      "settings_required": [
        "Requires a FixedUploadTest class to be defined in the same module or imported",
        "Requires write permissions to create a 'test_results' directory in the parent directory of the script",
        "The FixedUploadTest class must implement simulate_fixed_upload() and verify_fixes_applied() methods"
      ],
      "source_code": "def main():\n    \"\"\"Run the fixed upload test - SIMULATION ONLY\"\"\"\n    \n    try:\n        print(\"\ud83e\uddea FIXED UPLOAD TEST - SIMULATION ONLY\")\n        print(\"=\" * 60)\n        print(\"\ud83d\udeab NO ACTUAL API CALLS - TESTING FIXES ONLY\")\n        \n        # Initialize test\n        test = FixedUploadTest()\n        \n        # Simulate fixed upload\n        results = test.simulate_fixed_upload(\"Real_App_Behavior_Test\")\n        \n        # Verify fixes\n        fixes_verified = test.verify_fixes_applied(results)\n        \n        # Save results\n        results_file = Path(__file__).parent / \"test_results\" / f\"fixed_upload_simulation_{int(time.time())}.json\"\n        results_file.parent.mkdir(exist_ok=True)\n        \n        with open(results_file, 'w') as f:\n            json.dump(results, f, indent=2, default=str)\n        \n        print(f\"\\n\ud83d\udcbe Simulation results saved to: {results_file}\")\n        \n        # Summary\n        print(f\"\\n\ud83d\udccb SUMMARY:\")\n        print(f\"   All fixes applied: {'\u2705 YES' if fixes_verified else '\u274c NO'}\")\n        print(f\"   Components created: {len(results['upload_requests'])}\")\n        print(f\"   Ready for real upload: {'\u2705 YES' if fixes_verified else '\u274c NO'}\")\n        \n        if fixes_verified:\n            print(f\"\\n\ud83c\udfaf READY FOR REAL UPLOAD!\")\n            print(f\"   The simulated upload shows all critical fixes are correctly applied.\")\n            print(f\"   This should produce visible documents that match real app behavior.\")\n        else:\n            print(f\"\\n\u26a0\ufe0f FIXES NEED REVIEW\")\n            print(f\"   Some fixes were not applied correctly.\")\n        \n        return fixes_verified\n        \n    except Exception as e:\n        print(f\"\u274c Fixed upload test failed: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/fixed_upload_test.py",
      "tags": [
        "testing",
        "simulation",
        "upload",
        "remarkable",
        "validation",
        "document-processing",
        "integration-testing",
        "fix-verification",
        "file-io",
        "json"
      ],
      "updated_at": "2025-12-07T01:56:14.687726",
      "usage_example": "if __name__ == '__main__':\n    # Run the fixed upload simulation test\n    success = main()\n    \n    if success:\n        print('All tests passed, ready for production upload')\n    else:\n        print('Tests failed, review fixes before proceeding')\n    \n    # Exit with appropriate status code\n    import sys\n    sys.exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function is designed to be called as the main entry point of the script",
        "Ensure RealAppUploadAnalyzer class is properly implemented before calling this function",
        "The function handles exceptions gracefully and provides user-friendly console output",
        "Results are automatically saved to files, ensure proper file system permissions",
        "The hardcoded document name 'Pylontech force H3 datasheet' suggests this is for a specific test case",
        "Consider parameterizing the document name if this function needs to be reused for different documents",
        "The function returns a boolean for easy integration with exit codes in scripts"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:33:42",
      "decorators": [],
      "dependencies": [
        "requests",
        "pathlib"
      ],
      "description": "Entry point function that orchestrates the analysis of a document uploaded through a reMarkable app, saves results and logs, and reports success or failure.",
      "docstring": "Run the real app upload analysis",
      "id": 2065,
      "imports": [
        "import os",
        "import json",
        "import time",
        "from pathlib import Path",
        "import requests",
        "from auth import RemarkableAuth",
        "import re"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import time",
        "from pathlib import Path",
        "import requests",
        "from auth import RemarkableAuth",
        "import re"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 407,
      "line_start": 384,
      "name": "main_v62",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main execution entry point for analyzing real app document uploads to the reMarkable cloud. It instantiates a RealAppUploadAnalyzer, analyzes a specific document ('Pylontech force H3 datasheet'), saves the analysis results and raw logs, and provides user feedback through console output. The primary use case is to understand the correct pattern for root.docSchema sizes by analyzing actual app upload behavior.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating success (True) or failure (False) of the analysis operation. Returns True if the analysis completed successfully and results['success'] is True, otherwise returns False. Also returns False if any exception occurs during execution.",
      "settings_required": [
        "RealAppUploadAnalyzer class must be defined and available in the same module or imported",
        "auth module with RemarkableAuth class must be available",
        "reMarkable cloud authentication credentials (likely required by RemarkableAuth)",
        "Network access to reMarkable cloud services",
        "Write permissions for saving analysis results and logs"
      ],
      "source_code": "def main():\n    \"\"\"Run the real app upload analysis\"\"\"\n    try:\n        analyzer = RealAppUploadAnalyzer()\n        \n        # Analyze the document uploaded by the real app\n        results = analyzer.analyze_real_app_document(\"Pylontech force H3 datasheet\")\n        \n        # Save results\n        analyzer.save_analysis_results(results)\n        analyzer.save_raw_logs()\n        \n        if results['success']:\n            print(f\"\\n\ud83c\udf89 Analysis Complete!\")\n            print(f\"\u2705 Successfully analyzed '{results['document_name']}'\")\n            print(f\"\ud83d\udcca This shows us the correct pattern for root.docSchema sizes\")\n        else:\n            print(f\"\\n\u274c Analysis failed: {results.get('error', 'Unknown error')}\")\n        \n        return results['success']\n        \n    except Exception as e:\n        print(f\"\u274c Failed to run analysis: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_real_app_upload.py",
      "tags": [
        "entry-point",
        "orchestration",
        "remarkable",
        "document-analysis",
        "cloud-upload",
        "logging",
        "error-handling",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:56:14.687083",
      "usage_example": "if __name__ == '__main__':\n    success = main()\n    if success:\n        print('Analysis completed successfully')\n    else:\n        print('Analysis failed')\n    exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function modifies sys.path to enable local imports; ensure the parent directory structure is as expected",
        "The function is designed for testing purposes and may not be suitable for production use without additional error handling",
        "Ensure proper cleanup of sys.path modifications if using this in a larger application context",
        "The hardcoded output directory name 'remarkable_replica_v2' should be parameterized for production use",
        "Consider wrapping the function call in a try-except block to handle unexpected exceptions",
        "The debug print statement for session type should be removed or converted to proper logging in production",
        "Ensure RemarkableReplicaBuilder is properly imported or defined before calling this function"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "always needed for sys.path manipulation to enable local imports",
          "import": "import sys",
          "optional": false
        },
        {
          "condition": "imported dynamically after path modification; requires auth.py module in parent directory",
          "import": "from auth import RemarkableAuth",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:28:07",
      "decorators": [],
      "dependencies": [
        "requests",
        "pathlib",
        "sys",
        "PyPDF2",
        "shutil",
        "subprocess",
        "re",
        "json",
        "os",
        "logging",
        "typing",
        "dataclasses",
        "datetime"
      ],
      "description": "Entry point function that authenticates with Remarkable cloud service and builds a complete local replica of the user's Remarkable documents and notebooks.",
      "docstring": "Main function for testing",
      "id": 2047,
      "imports": [
        "import os",
        "import json",
        "import requests",
        "import logging",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Optional",
        "from typing import Any",
        "from typing import Set",
        "from dataclasses import dataclass",
        "from dataclasses import asdict",
        "from datetime import datetime",
        "import sys",
        "from auth import RemarkableAuth",
        "import re",
        "import shutil",
        "import subprocess",
        "import PyPDF2",
        "import shutil"
      ],
      "imports_required": [
        "from pathlib import Path"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 908,
      "line_start": 887,
      "name": "main_v61",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a test harness and main execution point for the Remarkable replica building system. It handles authentication through RemarkableAuth, obtains an authenticated session, and uses RemarkableReplicaBuilder to create a local copy of all Remarkable cloud content. The function includes error handling for authentication failures and debug output for session verification.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating success or failure of the replica building process. Returns False explicitly if authentication fails, otherwise returns the success status from builder.build_complete_replica(). Type: bool",
      "settings_required": [
        "auth.py module must exist in the parent directory containing RemarkableAuth class",
        "RemarkableReplicaBuilder class must be defined in the same module or imported",
        "Remarkable cloud service credentials (handled by RemarkableAuth)",
        "Network connectivity to Remarkable cloud services",
        "Write permissions for creating 'remarkable_replica_v2' directory"
      ],
      "source_code": "def main():\n    \"\"\"Main function for testing\"\"\"\n    import sys\n    sys.path.insert(0, str(Path(__file__).parent))\n    \n    from auth import RemarkableAuth\n    \n    # Authenticate and get session\n    auth = RemarkableAuth()\n    session = auth.get_authenticated_session()\n    \n    if not session:\n        print(\"\u274c Authentication failed\")\n        return False\n    \n    print(f\"Session type: {type(session)}\")  # Debug line\n    \n    # Build replica\n    builder = RemarkableReplicaBuilder(session, \"remarkable_replica_v2\")\n    success = builder.build_complete_replica()\n    \n    return success",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/local_replica_v2.py",
      "tags": [
        "testing",
        "authentication",
        "remarkable",
        "cloud-sync",
        "replica",
        "entry-point",
        "main-function",
        "document-management",
        "e-ink",
        "notebook-sync"
      ],
      "updated_at": "2025-12-07T01:56:14.686406",
      "usage_example": "if __name__ == '__main__':\n    success = main()\n    if success:\n        print('\u2705 Replica built successfully')\n    else:\n        print('\u274c Replica building failed')\n    sys.exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function is designed to be called as a script entry point, typically from if __name__ == '__main__' block",
        "The function returns proper exit codes (0 for success, 1 for failure) suitable for shell scripting and CI/CD integration",
        "Handles KeyboardInterrupt separately to allow graceful user cancellation",
        "Uses try-except blocks to catch all exceptions and prevent unhandled errors",
        "Provides clear console feedback with emoji indicators for better user experience",
        "Should be used in conjunction with proper logging configuration for production environments",
        "The RemarkableReplicaSync class must be properly initialized with required configuration before calling this function"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:26:27",
      "decorators": [],
      "dependencies": [
        "requests"
      ],
      "description": "Main entry point function that orchestrates a standalone synchronization process for reMarkable Replica, handling initialization, execution, and error reporting.",
      "docstring": "Main entry point for standalone sync",
      "id": 2042,
      "imports": [
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess",
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Optional",
        "from typing import List",
        "from typing import Set",
        "from dataclasses import dataclass"
      ],
      "imports_required": [
        "from pathlib import Path",
        "from datetime import datetime",
        "from typing import Dict, Any, Optional, List, Set",
        "from dataclasses import dataclass",
        "import os",
        "import sys",
        "import json",
        "import time",
        "import hashlib",
        "import requests",
        "import logging",
        "import re",
        "import shutil",
        "import subprocess"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 471,
      "line_start": 448,
      "name": "main_v60",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the command-line entry point for the reMarkable Replica sync tool. It initializes the RemarkableReplicaSync class, performs the synchronization operation, and provides user-friendly console feedback with emoji indicators. It handles graceful shutdown on keyboard interrupts and catches all exceptions to provide appropriate exit codes for shell integration.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful sync completion, 1 for failures (including user interruption, sync failures, or exceptions). This follows Unix convention where 0 indicates success and non-zero indicates failure.",
      "settings_required": [
        "RemarkableReplicaSync class must be defined and importable in the same module or imported from another module",
        "RemarkableReplicaSync may require configuration such as API credentials, file paths, or connection settings (depends on class implementation)",
        "Appropriate file system permissions for sync operations",
        "Network connectivity if syncing with remote reMarkable device or cloud service"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point for standalone sync\"\"\"\n    try:\n        print(\"\ud83d\udd04 Starting reMarkable Replica Sync\")\n        \n        # Initialize sync tool\n        sync = RemarkableReplicaSync()\n        \n        # Perform sync\n        success = sync.sync_replica()\n        \n        if success:\n            print(\"\u2705 Sync completed successfully!\")\n            return 0\n        else:\n            print(\"\u274c Sync failed!\")\n            return 1\n            \n    except KeyboardInterrupt:\n        print(\"\\n\u26a0\ufe0f Sync interrupted by user\")\n        return 1\n    except Exception as e:\n        print(f\"\u274c Sync failed with error: {e}\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/sync_replica_new.py",
      "tags": [
        "entry-point",
        "sync",
        "remarkable",
        "cli",
        "error-handling",
        "standalone",
        "orchestration",
        "exit-code"
      ],
      "updated_at": "2025-12-07T01:56:14.685231",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "Always run this function using asyncio.run(main()) or await it from another async context",
        "Ensure all three demo functions (demo_graphics_generation, demo_placeholder_parsing, demo_hybrid_response) are properly defined before calling main()",
        "The function includes comprehensive error handling with traceback printing for debugging",
        "Console output uses Unicode emoji characters; ensure terminal supports UTF-8 encoding",
        "This is a demonstration function and should not be used in production code without modification",
        "The function expects specific module dependencies to be available; verify all imports resolve correctly",
        "Consider wrapping the asyncio.run(main()) call in a if __name__ == '__main__': block when using as a script entry point"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only when an exception occurs during demo execution",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:03:16",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "tempfile",
        "pathlib",
        "json",
        "traceback"
      ],
      "description": "Orchestrates a comprehensive demonstration of E-Ink LLM hybrid mode capabilities, running three sequential demos showcasing graphics generation, placeholder parsing, and complete hybrid response processing.",
      "docstring": "Run all demos",
      "id": 1989,
      "imports": [
        "import asyncio",
        "import tempfile",
        "from pathlib import Path",
        "import json",
        "from graphics_generator import GraphicsGenerator",
        "from graphics_generator import GraphicSpec",
        "from graphics_generator import GraphicType",
        "from hybrid_response_handler import HybridResponseHandler",
        "from hybrid_pdf_generator import HybridPDFGenerator",
        "from hybrid_response_handler import HybridResponseHandler",
        "import traceback"
      ],
      "imports_required": [
        "import asyncio",
        "import tempfile",
        "from pathlib import Path",
        "import json",
        "from graphics_generator import GraphicsGenerator, GraphicSpec, GraphicType",
        "from hybrid_response_handler import HybridResponseHandler",
        "from hybrid_pdf_generator import HybridPDFGenerator"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 233,
      "line_start": 207,
      "name": "main_v59",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This async function serves as the main entry point for demonstrating the E-Ink LLM Assistant's hybrid text+graphics mode. It sequentially executes three demos: individual graphics generation, placeholder parsing, and complete hybrid response processing. The function provides user-friendly console output with progress indicators and error handling, and concludes with usage instructions for implementing hybrid mode in production.",
      "return_annotation": null,
      "return_explained": "Returns None (implicitly). The function is designed for side effects (console output and demonstration execution) rather than returning values. On success, it prints completion messages and usage instructions. On failure, it prints error messages and stack traces.",
      "settings_required": [
        "Requires demo_graphics_generation() async function to be defined in the same module",
        "Requires demo_placeholder_parsing() function to be defined in the same module",
        "Requires demo_hybrid_response() async function to be defined in the same module",
        "Requires graphics_generator module with GraphicsGenerator, GraphicSpec, and GraphicType classes",
        "Requires hybrid_response_handler module with HybridResponseHandler class",
        "Requires hybrid_pdf_generator module with HybridPDFGenerator class",
        "May require additional configuration for graphics generation (fonts, image libraries, etc.)"
      ],
      "source_code": "async def main():\n    \"\"\"Run all demos\"\"\"\n    print(\"\ud83d\ude80 E-Ink LLM Hybrid Mode Demonstration\")\n    print(\"=\" * 60)\n    print(\"This demo showcases the new hybrid text+graphics capabilities\")\n    print()\n    \n    try:\n        # Demo 1: Individual graphics generation\n        graphics = await demo_graphics_generation()\n        \n        # Demo 2: Placeholder parsing\n        demo_placeholder_parsing()\n        \n        # Demo 3: Complete hybrid response processing\n        await demo_hybrid_response()\n        \n        print(\"\\n\" + \"=\" * 60)\n        print(\"\u2705 Demo completed successfully!\")\n        print(\"\\nTo use hybrid mode in your E-Ink LLM Assistant:\")\n        print(\"   python main.py --file input.pdf --enable-hybrid-mode\")\n        print(\"\\nFor more information, see HYBRID_MODE_GUIDE.md\")\n        \n    except Exception as e:\n        print(f\"\\n\u274c Demo failed with error: {e}\")\n        import traceback\n        traceback.print_exc()",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/demo_hybrid_mode.py",
      "tags": [
        "async",
        "demo",
        "orchestration",
        "e-ink",
        "hybrid-mode",
        "graphics",
        "llm",
        "presentation",
        "error-handling",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:56:14.684565",
      "usage_example": "import asyncio\n\nasync def demo_graphics_generation():\n    # Your graphics demo implementation\n    return []\n\ndef demo_placeholder_parsing():\n    # Your placeholder parsing demo\n    pass\n\nasync def demo_hybrid_response():\n    # Your hybrid response demo\n    pass\n\nasync def main():\n    \"\"\"Run all demos\"\"\"\n    print(\"\ud83d\ude80 E-Ink LLM Hybrid Mode Demonstration\")\n    print(\"=\" * 60)\n    print(\"This demo showcases the new hybrid text+graphics capabilities\")\n    print()\n    \n    try:\n        graphics = await demo_graphics_generation()\n        demo_placeholder_parsing()\n        await demo_hybrid_response()\n        \n        print(\"\\n\" + \"=\" * 60)\n        print(\"\u2705 Demo completed successfully!\")\n        print(\"\\nTo use hybrid mode in your E-Ink LLM Assistant:\")\n        print(\"   python main.py --file input.pdf --enable-hybrid-mode\")\n        print(\"\\nFor more information, see HYBRID_MODE_GUIDE.md\")\n        \n    except Exception as e:\n        print(f\"\\n\u274c Demo failed with error: {e}\")\n        import traceback\n        traceback.print_exc()\n\nif __name__ == \"__main__\":\n    asyncio.run(main())"
    },
    {
      "best_practices": [
        "This function must be called using asyncio.run(main()) or within an existing async context",
        "Ensure test_remarkable_with_code() and test_remarkable_authentication() functions are defined before calling main()",
        "Command-line arguments must follow the exact format: --code <one_time_code>",
        "The function uses sys.exit(1) on failure, which will terminate the entire program",
        "Both test functions should handle their own exceptions and return boolean success status",
        "The function expects emoji characters in output - ensure terminal supports UTF-8 encoding",
        "This is designed as a test/validation script, not for production use in larger applications"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 23:52:16",
      "decorators": [],
      "dependencies": [
        "asyncio",
        "sys",
        "pathlib",
        "remarkable_cloud",
        "rmcl"
      ],
      "description": "Asynchronous main test function that validates reMarkable Cloud integration by either testing with a one-time authentication code or existing authentication credentials.",
      "docstring": "Main test function",
      "id": 1963,
      "imports": [
        "import asyncio",
        "import sys",
        "from pathlib import Path",
        "from remarkable_cloud import RemarkableCloudManager",
        "import rmcl"
      ],
      "imports_required": [
        "import asyncio",
        "import sys",
        "from pathlib import Path",
        "from remarkable_cloud import RemarkableCloudManager",
        "import rmcl"
      ],
      "is_async": 1,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 114,
      "line_start": 98,
      "name": "main_v58",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for testing reMarkable Cloud connectivity and authentication. It provides two testing modes: (1) authentication using a one-time code passed as a command-line argument, or (2) testing with existing stored authentication. The function validates the integration and provides user-friendly feedback on success or failure, exiting with appropriate status codes.",
      "return_annotation": null,
      "return_explained": "No explicit return value (returns None implicitly). The function communicates results through console output and system exit codes. Exits with code 1 on failure, continues normally (exit code 0) on success.",
      "settings_required": [
        "reMarkable Cloud authentication credentials (either existing stored credentials or a one-time authentication code)",
        "test_remarkable_with_code() function must be defined in the same module",
        "test_remarkable_authentication() function must be defined in the same module",
        "Both test functions must be async and return boolean success status"
      ],
      "source_code": "async def main():\n    \"\"\"Main test function\"\"\"\n    if len(sys.argv) > 1 and sys.argv[1] == \"--code\" and len(sys.argv) > 2:\n        # Test with one-time code\n        one_time_code = sys.argv[2]\n        success = await test_remarkable_with_code(one_time_code)\n    else:\n        # Test with existing authentication\n        success = await test_remarkable_authentication()\n    \n    if success:\n        print(\"\\n\ud83c\udf89 reMarkable Cloud integration test passed!\")\n        print(\"   You can now use the E-Ink LLM Assistant with reMarkable Cloud.\")\n    else:\n        print(\"\\n\u274c reMarkable Cloud integration test failed.\")\n        print(\"   Please check the error messages above.\")\n        sys.exit(1)",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/test_remarkable.py",
      "tags": [
        "async",
        "testing",
        "authentication",
        "remarkable",
        "cloud-integration",
        "cli",
        "command-line",
        "entry-point",
        "validation",
        "e-ink"
      ],
      "updated_at": "2025-12-07T01:56:14.683899",
      "usage_example": "# Run with existing authentication:\n# python script.py\n\n# Run with one-time code:\n# python script.py --code YOUR_ONE_TIME_CODE\n\n# In code:\nimport asyncio\nimport sys\nfrom pathlib import Path\nfrom remarkable_cloud import RemarkableCloudManager\nimport rmcl\n\n# Define required test functions first\nasync def test_remarkable_with_code(code):\n    # Implementation here\n    return True\n\nasync def test_remarkable_authentication():\n    # Implementation here\n    return True\n\nasync def main():\n    if len(sys.argv) > 1 and sys.argv[1] == \"--code\" and len(sys.argv) > 2:\n        one_time_code = sys.argv[2]\n        success = await test_remarkable_with_code(one_time_code)\n    else:\n        success = await test_remarkable_authentication()\n    \n    if success:\n        print(\"\\n\ud83c\udf89 reMarkable Cloud integration test passed!\")\n        print(\"   You can now use the E-Ink LLM Assistant with reMarkable Cloud.\")\n    else:\n        print(\"\\n\u274c reMarkable Cloud integration test failed.\")\n        print(\"   Please check the error messages above.\")\n        sys.exit(1)\n\nif __name__ == \"__main__\":\n    asyncio.run(main())"
    },
    {
      "best_practices": [
        "Ensure all required constants (OUTPUT_FOLDER, WUXI2_FOLDER, RESULTS_FILE, DETAILED_JSON) are properly defined before calling this function",
        "Verify that all helper functions (scan_output_folder, scan_wuxi2_folder, compare_documents, save_results, print_summary) are implemented and available",
        "Ensure the directories specified in OUTPUT_FOLDER and WUXI2_FOLDER exist and are accessible",
        "Verify write permissions for the output file paths (RESULTS_FILE and DETAILED_JSON)",
        "The function exits early if no coded documents are found in the output folder, so ensure the output folder contains expected documents",
        "Consider wrapping the main() call in a try-except block to handle potential file system errors or missing dependencies",
        "This function is designed to be called as the entry point, typically within an if __name__ == '__main__': block"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 22:24:17",
      "decorators": [],
      "dependencies": [
        "os",
        "re",
        "hashlib",
        "pathlib",
        "typing",
        "csv",
        "datetime",
        "collections",
        "json"
      ],
      "description": "Main execution function that orchestrates a document comparison workflow between two directories (mailsearch/output and wuxi2 repository), scanning for coded documents, comparing them, and generating results.",
      "docstring": "Main execution function",
      "id": 1868,
      "imports": [
        "import os",
        "import re",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import List",
        "from typing import Tuple",
        "from typing import Optional",
        "import csv",
        "from datetime import datetime",
        "from collections import defaultdict",
        "import json"
      ],
      "imports_required": [
        "import os",
        "import re",
        "import hashlib",
        "from pathlib import Path",
        "from typing import Dict, List, Tuple, Optional",
        "import csv",
        "from datetime import datetime",
        "from collections import defaultdict",
        "import json"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 440,
      "line_start": 412,
      "name": "main_v57",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for a document comparison tool. It coordinates the entire workflow: scanning the output folder for documents, scanning the wuxi2 repository, comparing documents between the two locations, saving comparison results to files, and printing a summary. It's designed to identify differences, matches, or discrepancies between document sets in different locations.",
      "return_annotation": null,
      "return_explained": "Returns None (implicit). The function performs side effects including printing to console, writing results to files (RESULTS_FILE and DETAILED_JSON), and potentially creating output directories.",
      "settings_required": [
        "OUTPUT_FOLDER constant must be defined in the module scope pointing to the mailsearch/output directory",
        "WUXI2_FOLDER constant must be defined in the module scope pointing to the wuxi2 repository directory",
        "RESULTS_FILE constant must be defined specifying the path for CSV results output",
        "DETAILED_JSON constant must be defined specifying the path for JSON results output",
        "scan_output_folder() function must be defined and available in the same module",
        "scan_wuxi2_folder() function must be defined and available in the same module",
        "compare_documents() function must be defined and available in the same module",
        "save_results() function must be defined and available in the same module",
        "print_summary() function must be defined and available in the same module",
        "Read/write permissions for OUTPUT_FOLDER, WUXI2_FOLDER, and output file paths"
      ],
      "source_code": "def main():\n    \"\"\"Main execution function\"\"\"\n    print(f\"\\n{'='*80}\")\n    print(\"Document Comparison Tool\")\n    print(\"Comparing mailsearch/output with wuxi2 repository\")\n    print(f\"{'='*80}\")\n    \n    # Scan output folder\n    output_docs = scan_output_folder(OUTPUT_FOLDER)\n    \n    if not output_docs:\n        print(\"\\n\u2717 No coded documents found in output folder!\")\n        return\n    \n    # Scan wuxi2 repository\n    wuxi2_docs = scan_wuxi2_folder(WUXI2_FOLDER)\n    \n    # Compare documents\n    results = compare_documents(output_docs, wuxi2_docs)\n    \n    # Save results\n    save_results(results, RESULTS_FILE, DETAILED_JSON)\n    \n    # Print summary\n    print_summary(results)\n    \n    print(f\"{'='*80}\")\n    print(\"Comparison complete!\")\n    print(f\"{'='*80}\\n\")",
      "source_file": "/tf/active/vicechatdev/mailsearch/compare_documents.py",
      "tags": [
        "document-comparison",
        "file-scanning",
        "workflow-orchestration",
        "main-entry-point",
        "batch-processing",
        "reporting",
        "file-system",
        "comparison-tool"
      ],
      "updated_at": "2025-12-07T01:56:14.683179",
      "usage_example": "# Define required constants and helper functions first\nOUTPUT_FOLDER = './mailsearch/output'\nWUXI2_FOLDER = './wuxi2'\nRESULTS_FILE = './comparison_results.csv'\nDETAILED_JSON = './comparison_results.json'\n\n# Define required helper functions (scan_output_folder, scan_wuxi2_folder, etc.)\n# ... (implementation of helper functions)\n\n# Execute the main function\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function expects test functions to be defined in the same module scope before calling main()",
        "Each test function should return a boolean (True for pass, False for fail) or raise an exception",
        "The function uses print statements for output; consider redirecting stdout if capturing test results programmatically",
        "Exit codes follow Unix convention: use sys.exit(main()) to properly terminate the process with the correct code",
        "Test functions are executed sequentially; a failed test does not stop subsequent tests from running",
        "Exceptions in test functions are caught and counted as failures, allowing the test suite to continue",
        "The setup_logging() function is called before tests run; ensure logging configuration is appropriate for test environment"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 17:37:01",
      "decorators": [],
      "dependencies": [
        "logging",
        "sys",
        "os",
        "pathlib"
      ],
      "description": "Orchestrates and executes a test suite for an email forwarder service, running multiple test functions sequentially and reporting results.",
      "docstring": "Run all tests.",
      "id": 1478,
      "imports": [
        "import sys",
        "import os",
        "import logging",
        "from pathlib import Path",
        "from config import settings",
        "from utils.logger import setup_logging",
        "from forwarder.o365_client import O365Client",
        "from forwarder.email_handler import EmailHandler"
      ],
      "imports_required": [
        "import sys",
        "import os",
        "import logging",
        "from pathlib import Path",
        "from config import settings",
        "from utils.logger import setup_logging",
        "from forwarder.o365_client import O365Client",
        "from forwarder.email_handler import EmailHandler"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 130,
      "line_start": 96,
      "name": "main_v56",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for testing an email forwarding service. It sets up logging, executes a predefined list of test functions (configuration, O365 connection, email handler, and send email tests), tracks pass/fail status, and provides a summary report. Returns 0 for success (all tests passed) or 1 for failure (some tests failed).",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if all tests passed successfully, 1 if any tests failed. This follows standard Unix convention for process exit codes where 0 indicates success.",
      "settings_required": [
        "config.py module with settings object must be available",
        "utils.logger module with setup_logging function must be available",
        "forwarder.o365_client module with O365Client class must be available",
        "forwarder.email_handler module with EmailHandler class must be available",
        "Test functions must be defined in the same module: test_configuration, test_o365_connection, test_email_handler, test_send_email",
        "Each test function should return True for pass, False for fail, or raise an exception on error"
      ],
      "source_code": "def main():\n    \"\"\"Run all tests.\"\"\"\n    print(\"Email Forwarder Service Test Suite\")\n    print(\"=\" * 40)\n    \n    # Setup logging\n    setup_logging()\n    \n    tests = [\n        test_configuration,\n        test_o365_connection,\n        test_email_handler,\n        test_send_email\n    ]\n    \n    passed = 0\n    total = len(tests)\n    \n    for test in tests:\n        try:\n            if test():\n                passed += 1\n        except Exception as e:\n            print(f\"\u2717 Test failed with exception: {e}\")\n        print()\n    \n    print(\"=\" * 40)\n    print(f\"Tests passed: {passed}/{total}\")\n    \n    if passed == total:\n        print(\"All tests passed! \u2713\")\n        return 0\n    else:\n        print(\"Some tests failed! \u2717\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/email-forwarder/test_service.py",
      "tags": [
        "testing",
        "test-suite",
        "email",
        "forwarder",
        "integration-tests",
        "test-runner",
        "o365",
        "logging",
        "exit-code"
      ],
      "updated_at": "2025-12-07T01:56:14.682531",
      "usage_example": "# Assuming all required modules and test functions are defined\n# in the same file or imported\n\ndef test_configuration():\n    \"\"\"Example test function.\"\"\"\n    return True\n\ndef test_o365_connection():\n    \"\"\"Example test function.\"\"\"\n    return True\n\ndef test_email_handler():\n    \"\"\"Example test function.\"\"\"\n    return True\n\ndef test_send_email():\n    \"\"\"Example test function.\"\"\"\n    return True\n\nif __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function expects three test functions to be defined in the same module: test_pyodbc_import, test_odbc_driver, and test_connection_string",
        "Each test function should return a boolean indicating success (True) or failure (False)",
        "Test functions should handle their own exceptions and print appropriate messages",
        "The function is designed to be used as a standalone diagnostic tool, typically called from if __name__ == '__main__' block",
        "Return value can be used to set exit codes for CI/CD pipelines (0 for success, 1 for failure)",
        "All test exceptions are caught and treated as test failures, ensuring the suite completes even if individual tests crash",
        "The function provides user-friendly output with emojis and formatting for better readability in terminal environments"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 16:23:14",
      "decorators": [],
      "dependencies": [
        "pyodbc",
        "sqlalchemy"
      ],
      "description": "Orchestrates and executes a suite of ODBC connectivity tests for SQL Server, providing formatted output and a summary of test results.",
      "docstring": "Run all tests",
      "id": 1252,
      "imports": [
        "import pyodbc",
        "import sqlalchemy",
        "from sqlalchemy import create_engine",
        "from sqlalchemy import text",
        "import pyodbc"
      ],
      "imports_required": [
        "import pyodbc",
        "import sqlalchemy",
        "from sqlalchemy import create_engine",
        "from sqlalchemy import text"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 104,
      "line_start": 69,
      "name": "main_v55",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for testing SQL Server ODBC connectivity. It runs a predefined set of tests (pyodbc import, ODBC driver availability, and connection string validation), collects results, handles exceptions, and provides a comprehensive summary with helpful connection tips. It's designed to validate that the environment is properly configured for SQL Server database connections.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value indicating whether all tests passed. Returns True if all tests in the suite completed successfully, False if any test failed or raised an exception.",
      "settings_required": [
        "ODBC Driver 18 for SQL Server must be installed on the system",
        "SQL Server instance must be accessible (local or remote)",
        "Requires three test functions to be defined in the same module: test_pyodbc_import, test_odbc_driver, and test_connection_string"
      ],
      "source_code": "def main():\n    \"\"\"Run all tests\"\"\"\n    print(\"\ud83e\uddea SmartStat ODBC Connectivity Test\")\n    print(\"=\" * 40)\n    \n    tests = [\n        test_pyodbc_import,\n        test_odbc_driver,\n        test_connection_string\n    ]\n    \n    results = []\n    for test in tests:\n        try:\n            result = test()\n            results.append(result)\n        except Exception as e:\n            print(f\"\u274c Test failed with exception: {e}\")\n            results.append(False)\n    \n    print(\"\\n\" + \"=\" * 40)\n    print(\"\ud83d\udcca Test Summary:\")\n    print(f\"\u2705 Passed: {sum(results)}/{len(results)} tests\")\n    \n    if all(results):\n        print(\"\ud83c\udf89 All tests passed! SQL Server connectivity is ready.\")\n        print(\"\\n\ud83d\udca1 Connection Tips:\")\n        print(\"   - Use 'ODBC Driver 18 for SQL Server' as driver name\")\n        print(\"   - For local connections: server='localhost' or server='127.0.0.1'\")\n        print(\"   - For trusted connections: trusted_connection=yes\")\n        print(\"   - For SQL Auth: provide username and password\")\n        print(\"   - You may need TrustServerCertificate=yes for SSL issues\")\n    else:\n        print(\"\u26a0\ufe0f  Some tests failed. Check the errors above.\")\n    \n    return all(results)",
      "source_file": "/tf/active/vicechatdev/full_smartstat/test_odbc.py",
      "tags": [
        "testing",
        "odbc",
        "sql-server",
        "connectivity",
        "database",
        "validation",
        "diagnostics",
        "test-suite",
        "pyodbc",
        "sqlalchemy"
      ],
      "updated_at": "2025-12-07T01:56:14.681804",
      "usage_example": "# Ensure test functions are defined in the same module\ndef test_pyodbc_import():\n    try:\n        import pyodbc\n        print(\"\u2705 pyodbc imported successfully\")\n        return True\n    except ImportError:\n        print(\"\u274c pyodbc import failed\")\n        return False\n\ndef test_odbc_driver():\n    drivers = pyodbc.drivers()\n    if 'ODBC Driver 18 for SQL Server' in drivers:\n        print(\"\u2705 ODBC Driver 18 found\")\n        return True\n    print(\"\u274c ODBC Driver 18 not found\")\n    return False\n\ndef test_connection_string():\n    print(\"\u2705 Connection string test passed\")\n    return True\n\n# Run the main test suite\nif __name__ == '__main__':\n    success = main()\n    exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of a test script, typically with if __name__ == '__main__'",
        "The return value should be used with sys.exit() to properly signal test success/failure to the operating system",
        "All test functions called by main() must be defined before calling main()",
        "Test functions should raise AssertionError for test failures to be properly caught and reported",
        "Ensure all required modules (config, rag_engine, app) are available in the Python path before execution",
        "The function provides detailed console output, so redirect stdout/stderr appropriately in automated environments",
        "Consider running this in a test environment separate from production to avoid side effects"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "only used when an unexpected exception occurs during test execution",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 09:57:50",
      "decorators": [],
      "dependencies": [
        "sys",
        "json",
        "config",
        "rag_engine",
        "os",
        "app",
        "traceback"
      ],
      "description": "Test orchestration function that executes a comprehensive test suite for DocChat's multi-LLM model selection feature and reports results.",
      "docstring": "Run all tests",
      "id": 346,
      "imports": [
        "import sys",
        "import json",
        "import config",
        "from rag_engine import get_llm_instance",
        "import config",
        "import config",
        "import os",
        "import app as flask_app",
        "import traceback"
      ],
      "imports_required": [
        "import sys",
        "import json",
        "import config",
        "from rag_engine import get_llm_instance",
        "import os",
        "import app as flask_app",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 213,
      "line_start": 172,
      "name": "main_v54",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for running integration tests for the DocChat application. It sequentially executes tests for configuration, RAG engine, Flask routes, API endpoints, and frontend files. It provides formatted console output showing test progress and results, and returns an exit code indicating success (0) or failure (1) for use in CI/CD pipelines or test automation.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if all tests pass successfully, 1 if any test fails (either through AssertionError or unexpected Exception). This follows Unix convention for process exit codes where 0 indicates success.",
      "settings_required": [
        "config.py module must be present with multi-LLM configuration settings",
        "rag_engine.py module must be available with get_llm_instance function",
        "app.py Flask application module must exist",
        "test_config(), test_rag_engine(), test_flask_routes(), test_api_models_endpoint(), and test_frontend_files() functions must be defined in the same module or imported",
        "Frontend files and API endpoints must be properly configured for testing"
      ],
      "source_code": "def main():\n    \"\"\"Run all tests\"\"\"\n    print(\"\\n\" + \"=\" * 60)\n    print(\"DocChat Multi-LLM Model Selection Tests\")\n    print(\"=\" * 60)\n    print()\n    \n    try:\n        test_config()\n        test_rag_engine()\n        test_flask_routes()\n        test_api_models_endpoint()\n        test_frontend_files()\n        \n        print(\"=\" * 60)\n        print(\"\u2705 ALL TESTS PASSED!\")\n        print(\"=\" * 60)\n        print()\n        print(\"Model selection is fully implemented and working:\")\n        print(\"  - Backend: \u2713 Multi-LLM support configured\")\n        print(\"  - API: \u2713 Model selection endpoint ready\")\n        print(\"  - Frontend: \u2713 UI with model dropdown\")\n        print(\"  - Integration: \u2713 Model parameter sent and used\")\n        print()\n        return 0\n        \n    except AssertionError as e:\n        print(\"=\" * 60)\n        print(\"\u274c TEST FAILED!\")\n        print(\"=\" * 60)\n        print(f\"Error: {e}\")\n        print()\n        return 1\n    except Exception as e:\n        print(\"=\" * 60)\n        print(\"\u274c UNEXPECTED ERROR!\")\n        print(\"=\" * 60)\n        print(f\"Error: {type(e).__name__}: {e}\")\n        import traceback\n        traceback.print_exc()\n        print()\n        return 1",
      "source_file": "/tf/active/vicechatdev/docchat/test_model_selection.py",
      "tags": [
        "testing",
        "integration-tests",
        "test-orchestration",
        "multi-llm",
        "test-runner",
        "ci-cd",
        "validation",
        "docchat",
        "model-selection"
      ],
      "updated_at": "2025-12-07T01:56:14.681151",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point for testing the sync application before production use",
        "Ensure all configuration files and credentials are properly set up before running",
        "The function returns exit codes suitable for use with sys.exit() for proper process termination",
        "Review console output carefully as it provides detailed status and next steps",
        "This is a test function and should not be used for production synchronization - use main.py with appropriate flags instead",
        "The function depends on test_graph_client() and test_filecloud_integration() helper functions being defined in the same module",
        "Exception handling includes full stack trace printing for debugging purposes"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside try block during configuration loading phase",
          "import": "from config import Config",
          "optional": false
        },
        {
          "condition": "imported inside except block for error handling and stack trace printing",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 09:13:32",
      "decorators": [],
      "dependencies": [
        "datetime",
        "os",
        "sys",
        "traceback",
        "sharepoint_graph_client",
        "config",
        "sync_service"
      ],
      "description": "Main test function that validates SharePoint Graph API integration, tests the Graph client connection, and verifies FileCloud sync functionality.",
      "docstring": "Main test function.",
      "id": 214,
      "imports": [
        "import os",
        "import sys",
        "from datetime import datetime",
        "from sharepoint_graph_client import SharePointGraphClient",
        "from config import Config",
        "from sync_service import SharePointFileCloudSync",
        "from config import Config",
        "import traceback",
        "import traceback",
        "import traceback"
      ],
      "imports_required": [
        "from datetime import datetime"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 145,
      "line_start": 99,
      "name": "main_v53",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the primary entry point for testing the SharePoint to FileCloud synchronization application. It validates configuration, tests the Graph API client connection to SharePoint, verifies document retrieval, and tests the full sync integration with FileCloud. It provides detailed console output with status indicators and next steps for deployment.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful completion of all tests (Graph API client test and sync integration test both passed), 1 for any failure (configuration error, Graph API test failure, sync integration failure, or exception). This follows standard Unix exit code conventions.",
      "settings_required": [
        "config.py module with Config class containing SHAREPOINT_SITE_URL",
        "config.py module with Config class containing FILECLOUD_SERVER_URL",
        "Config.validate_config() must pass validation",
        "Config.setup_logging() must be available",
        "SharePoint Graph API credentials configured",
        "FileCloud server credentials configured",
        "test_graph_client() function must be defined in the same module",
        "test_filecloud_integration() function must be defined in the same module"
      ],
      "source_code": "def main():\n    \"\"\"Main test function.\"\"\"\n    print(\"SharePoint Graph API Integration Test\")\n    print(\"=\" * 60)\n    print(f\"Test time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n    print()\n    \n    try:\n        # Load configuration\n        from config import Config\n        Config.validate_config()\n        Config.setup_logging()\n        \n        print(\"\u2705 Configuration loaded successfully\")\n        print(f\"SharePoint Site: {Config.SHAREPOINT_SITE_URL}\")\n        print(f\"FileCloud Server: {Config.FILECLOUD_SERVER_URL}\")\n        print()\n        \n        # Test Graph client\n        graph_success, doc_count = test_graph_client()\n        \n        if graph_success:\n            print(f\"\\n\ud83c\udf89 Graph API client test PASSED! Found {doc_count} documents.\")\n            \n            # Test full sync integration\n            sync_success = test_filecloud_integration()\n            \n            if sync_success:\n                print(\"\\n\ud83c\udf89 Full sync integration test PASSED!\")\n                print(\"\\n\u2705 The sync application is ready to use with Graph API!\")\n                print(\"\\nNext steps:\")\n                print(\"1. Run a test sync: python main.py --once\")\n                print(\"2. Monitor the logs for any issues\")\n                print(\"3. Set up continuous sync: python main.py\")\n                return 0\n            else:\n                print(\"\\n\u274c Sync integration test failed.\")\n                return 1\n        else:\n            print(\"\\n\u274c Graph API client test failed.\")\n            return 1\n            \n    except Exception as e:\n        print(f\"\u274c Test failed with exception: {e}\")\n        import traceback\n        traceback.print_exc()\n        return 1",
      "source_file": "/tf/active/vicechatdev/SPFCsync/test_graph_client.py",
      "tags": [
        "testing",
        "integration-test",
        "sharepoint",
        "graph-api",
        "filecloud",
        "sync",
        "validation",
        "configuration",
        "main-entry-point",
        "cli"
      ],
      "updated_at": "2025-12-07T01:56:14.680464",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point for SharePoint connectivity testing",
        "Ensure all required configuration values are set in config.py before running",
        "The function returns exit codes suitable for use with sys.exit() for proper process termination",
        "Check that test_rest_client() function is properly defined before calling main()",
        "Review console output for detailed test results and error messages",
        "The function handles exceptions gracefully and provides informative error messages",
        "Use the return code to determine if the SharePoint sync service is ready for deployment"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside try block for configuration loading and validation",
          "import": "from config import Config",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 09:12:44",
      "decorators": [],
      "dependencies": [
        "datetime",
        "config",
        "sharepoint_rest_client"
      ],
      "description": "Main test function that validates SharePoint REST API connectivity by loading configuration, setting up logging, and executing REST client tests.",
      "docstring": "Main test function.",
      "id": 211,
      "imports": [
        "import os",
        "import sys",
        "from datetime import datetime",
        "from sharepoint_rest_client import SharePointRestClient",
        "from config import Config",
        "from config import Config"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "from datetime import datetime",
        "from sharepoint_rest_client import SharePointRestClient",
        "from config import Config"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 90,
      "line_start": 61,
      "name": "main_v52",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the entry point for testing SharePoint REST API integration. It validates the configuration, displays connection details, executes REST client tests, and reports success or failure. It's designed to verify that the SharePoint sync service can successfully connect and communicate with SharePoint before deployment.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 for successful test completion (all tests passed), 1 for test failure (either exception occurred or REST client test failed). This follows standard Unix exit code conventions where 0 indicates success.",
      "settings_required": [
        "config.py module with Config class containing SHAREPOINT_SITE_URL attribute",
        "config.py module with Config class containing SHAREPOINT_DOCUMENTS_PATH attribute",
        "Config.validate_config() method must be implemented and functional",
        "Config.setup_logging() method must be implemented and functional",
        "test_rest_client() function must be defined in the same module or imported",
        "SharePoint authentication credentials configured in Config class",
        "Network access to SharePoint site specified in configuration"
      ],
      "source_code": "def main():\n    \"\"\"Main test function.\"\"\"\n    print(\"SharePoint REST API Connection Test\")\n    print(\"=\" * 50)\n    print(f\"Test time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n    print()\n    \n    try:\n        # Load configuration\n        from config import Config\n        Config.validate_config()\n        Config.setup_logging()\n        \n        print(\"\u2705 Configuration loaded successfully\")\n        print(f\"SharePoint Site: {Config.SHAREPOINT_SITE_URL}\")\n        print(f\"Documents Path: {Config.SHAREPOINT_DOCUMENTS_PATH}\")\n        print()\n        \n        # Test REST client\n        if test_rest_client():\n            print(\"\\n\ud83c\udf89 SharePoint REST API test passed!\")\n            print(\"The sync service should work with this approach.\")\n            return 0\n        else:\n            print(\"\\n\u274c SharePoint REST API test failed.\")\n            return 1\n            \n    except Exception as e:\n        print(f\"\u274c Test failed with exception: {e}\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/SPFCsync/test_rest_client.py",
      "tags": [
        "testing",
        "sharepoint",
        "rest-api",
        "integration-test",
        "configuration",
        "validation",
        "entry-point",
        "connectivity-test",
        "main-function"
      ],
      "updated_at": "2025-12-07T01:56:14.679734",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point of a diagnostic script",
        "Ensure config.py is properly configured with SharePoint credentials before running",
        "The function depends on explore_site_structure() which must be defined elsewhere in the codebase",
        "Review the diagnostic output carefully to understand folder visibility issues",
        "Use the exit code for integration with shell scripts or CI/CD pipelines",
        "The function provides user-friendly output with emojis and formatting for better readability",
        "Consider checking multiple document libraries if folders are not found in the default location"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside try block for configuration validation",
          "import": "from config import Config",
          "optional": false
        },
        {
          "condition": "imported inside except block for error reporting",
          "import": "import traceback",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 09:10:44",
      "decorators": [],
      "dependencies": [
        "requests",
        "config",
        "sharepoint_graph_client",
        "traceback"
      ],
      "description": "A diagnostic function that explores SharePoint site structure to investigate why only 2 folders are visible when more are expected in the web interface.",
      "docstring": "Main diagnostic function.",
      "id": 205,
      "imports": [
        "import requests",
        "import json",
        "import os",
        "import sys",
        "from sharepoint_graph_client import SharePointGraphClient",
        "from config import Config",
        "from config import Config",
        "import traceback",
        "import traceback"
      ],
      "imports_required": [
        "from config import Config",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 288,
      "line_start": 248,
      "name": "main_v51",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a SharePoint diagnostic tool. It validates configuration, explores the SharePoint site structure using the Graph API, and provides detailed feedback about potential reasons for folder visibility discrepancies. It helps troubleshoot issues where the SharePoint web interface shows more folders than are accessible via the API.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if the diagnostic completed successfully, 1 if any errors occurred during execution. This follows standard Unix exit code conventions for command-line tools.",
      "settings_required": [
        "config.py module with Config class containing SHAREPOINT_SITE_URL and validate_config() method",
        "SharePoint authentication credentials configured in Config class",
        "explore_site_structure() function must be defined in the same module or imported",
        "SharePointGraphClient class must be available for SharePoint API access"
      ],
      "source_code": "def main():\n    \"\"\"Main diagnostic function.\"\"\"\n    print(\"SharePoint Structure Diagnostic\")\n    print(\"=\" * 60)\n    print(\"This diagnostic will explore why we're only seeing 2 folders\")\n    print(\"when the SharePoint web interface shows many more.\")\n    print()\n    \n    try:\n        # Load configuration\n        from config import Config\n        Config.validate_config()\n        \n        print(\"\u2705 Configuration loaded successfully\")\n        print(f\"SharePoint Site: {Config.SHAREPOINT_SITE_URL}\")\n        print()\n        \n        # Run comprehensive exploration\n        success = explore_site_structure()\n        \n        if success:\n            print(\"\\n\ud83d\udccb DIAGNOSTIC SUMMARY:\")\n            print(\"-\" * 30)\n            print(\"The diagnostic has explored multiple ways to access your SharePoint content.\")\n            print(\"If we still only see 2 folders, it could mean:\")\n            print(\"1. The other folders are in a different document library\")\n            print(\"2. There are permission restrictions on those folders\")\n            print(\"3. The folders might be in a different site or subsite\")\n            print(\"4. The web interface shows a filtered or aggregated view\")\n            print()\n            print(\"\ud83d\udca1 RECOMMENDATION:\")\n            print(\"Check if there are multiple document libraries in your SharePoint site,\")\n            print(\"or if the folders are organized differently than expected.\")\n            \n        return 0 if success else 1\n        \n    except Exception as e:\n        print(f\"\u274c Diagnostic failed with exception: {e}\")\n        import traceback\n        traceback.print_exc()\n        return 1",
      "source_file": "/tf/active/vicechatdev/SPFCsync/diagnostic_comprehensive.py",
      "tags": [
        "diagnostic",
        "sharepoint",
        "debugging",
        "folder-exploration",
        "configuration-validation",
        "graph-api",
        "troubleshooting",
        "cli-tool",
        "main-entry-point"
      ],
      "updated_at": "2025-12-07T01:56:14.678987",
      "usage_example": "if __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)"
    },
    {
      "best_practices": [
        "Ensure load_config() function is implemented before calling main()",
        "The SharePoint site URL must follow the exact format: https://{tenant}.sharepoint.com/sites/{sitename}",
        "User must have SharePoint administrator privileges to grant app permissions",
        "The function is designed for interactive terminal use and prints directly to stdout",
        "Should be called as the entry point of a script (if __name__ == '__main__')",
        "The generated permission XML grants Read access at site collection scope - modify if different permissions are needed",
        "After running this helper, users should execute test_connections.py to verify permissions were granted successfully"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:08:03",
      "decorators": [],
      "dependencies": [],
      "description": "Interactive CLI helper function that generates and displays instructions for granting SharePoint app permissions to an Azure AD application.",
      "docstring": "Generate the app permission grant URL.",
      "id": 196,
      "imports": [
        "import os"
      ],
      "imports_required": [
        "import os"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 85,
      "line_start": 24,
      "name": "main_v50",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a guided wizard to help administrators grant SharePoint permissions to an Azure AD app. It loads configuration, parses SharePoint site URLs, extracts tenant information, and provides step-by-step instructions with the necessary URLs and XML configuration for granting app-only permissions at the site collection level. It offers both the direct appinv.aspx method and an alternative Admin Center approach.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 1 if configuration loading fails, missing required settings, or URL parsing fails; implicitly returns None (0) on successful execution. The return value indicates whether the helper completed successfully.",
      "settings_required": [
        "Requires a load_config() function to be defined in the same module that returns a dictionary",
        "SHAREPOINT_SITE_URL configuration value in format 'https://{tenant}.sharepoint.com/sites/{sitename}'",
        "AZURE_CLIENT_ID configuration value containing the Azure AD application client ID"
      ],
      "source_code": "def main():\n    \"\"\"Generate the app permission grant URL.\"\"\"\n    print(\"SharePoint App Permission Grant Helper\")\n    print(\"=\" * 50)\n    \n    config = load_config()\n    if not config:\n        print(\"\u274c Could not load configuration\")\n        return 1\n    \n    site_url = config.get('SHAREPOINT_SITE_URL', '')\n    client_id = config.get('AZURE_CLIENT_ID', '')\n    \n    if not site_url or not client_id:\n        print(\"\u274c Missing SHAREPOINT_SITE_URL or AZURE_CLIENT_ID in configuration\")\n        return 1\n    \n    # Extract site components\n    if '.sharepoint.com/sites/' in site_url:\n        base_url = site_url.split('/sites/')[0]\n        site_name = site_url.split('/sites/')[-1]\n        tenant = base_url.split('https://')[-1].split('.sharepoint.com')[0]\n    else:\n        print(\"\u274c Cannot parse SharePoint site URL\")\n        return 1\n    \n    print(f\"Site URL: {site_url}\")\n    print(f\"Tenant: {tenant}\")\n    print(f\"Client ID: {client_id}\")\n    print()\n    \n    # Generate the app permission grant URL\n    app_grant_url = f\"{base_url}/_layouts/15/appinv.aspx\"\n    \n    print(\"\ud83d\udd17 SharePoint App Permission Grant\")\n    print(\"-\" * 30)\n    print(f\"1. Open this URL in your browser: {app_grant_url}\")\n    print()\n    print(\"2. Fill in the form with these values:\")\n    print(f\"   App Id: {client_id}\")\n    print(\"   App Domain: (leave blank)\")\n    print(\"   App Redirect URL: (leave blank)\")\n    print()\n    print(\"3. Click 'Generate' to auto-fill the Title and App Domain\")\n    print()\n    print(\"4. In the Permission Request XML field, paste this:\")\n    print()\n    print(\"\"\"<AppPermissionRequests AllowAppOnlyPolicy=\"true\">\n  <AppPermissionRequest Scope=\"http://sharepoint/content/sitecollection\" Right=\"Read\" />\n</AppPermissionRequests>\"\"\")\n    print()\n    print(\"5. Click 'Create' and then 'Trust It' when prompted\")\n    print()\n    print(\"\ud83d\udccb Alternative Method: Admin Center\")\n    print(\"-\" * 30)\n    print(\"If the above doesn't work, try this:\")\n    print(f\"1. Go to SharePoint Admin Center\")\n    print(f\"2. Navigate to More features \u2192 Apps \u2192 App Catalog\")\n    print(f\"3. Add your app with Client ID: {client_id}\")\n    print()\n    print(\"After granting permissions, test again with:\")\n    print(\"python test_connections.py\")",
      "source_file": "/tf/active/vicechatdev/SPFCsync/grant_sharepoint_access.py",
      "tags": [
        "sharepoint",
        "azure-ad",
        "permissions",
        "cli-helper",
        "configuration",
        "app-registration",
        "authentication",
        "admin-tool",
        "interactive",
        "setup-wizard"
      ],
      "updated_at": "2025-12-07T01:56:14.678203",
      "usage_example": "# Assuming load_config() function exists and returns proper config\n# Example config.py or .env should contain:\n# SHAREPOINT_SITE_URL=https://contoso.sharepoint.com/sites/mysite\n# AZURE_CLIENT_ID=12345678-1234-1234-1234-123456789abc\n\nif __name__ == '__main__':\n    exit_code = main()\n    if exit_code:\n        print('Failed to generate permission grant instructions')\n    else:\n        print('Instructions displayed successfully')"
    },
    {
      "best_practices": [
        "Run this function before attempting any SharePoint connections to ensure configuration is valid",
        "Use the return code to determine if setup is complete (0 = success, 1 = failure)",
        "Follow the printed next steps after successful validation",
        "Ensure all helper functions (load_env_file, validate_sharepoint_url, validate_azure_client_id, validate_azure_client_secret) are properly implemented",
        "The function expects validation functions to return tuples of (bool, str) where bool indicates validity and str contains the message",
        "This function is designed for interactive use with console output and should be run from command line",
        "Keep the .env file secure as it contains sensitive Azure credentials"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:07:32",
      "decorators": [],
      "dependencies": [],
      "description": "A validation function that checks SharePoint configuration settings from environment variables and provides diagnostic feedback on their validity.",
      "docstring": "Main validation function.",
      "id": 194,
      "imports": [
        "import os",
        "import sys",
        "import re",
        "from urllib.parse import urlparse"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import re",
        "from urllib.parse import urlparse"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 128,
      "line_start": 79,
      "name": "main_v49",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a configuration validator for SharePoint integration. It loads environment variables from a .env file, validates the SharePoint URL, Azure Client ID, and Azure Client Secret, then provides comprehensive feedback on configuration status with actionable next steps. It's designed to be run as a standalone diagnostic tool before attempting SharePoint connections.",
      "return_annotation": null,
      "return_explained": "Returns an integer exit code: 0 if all validations pass (SharePoint URL, Azure Client ID, and Azure Client Secret are all valid), or 1 if any validation fails or if the .env file cannot be loaded. This follows standard Unix exit code conventions where 0 indicates success.",
      "settings_required": [
        ".env file in the working directory containing SHAREPOINT_SITE_URL, AZURE_CLIENT_ID, and AZURE_CLIENT_SECRET",
        "load_env_file() function must be defined in the same module",
        "validate_sharepoint_url() function must be defined in the same module",
        "validate_azure_client_id() function must be defined in the same module",
        "validate_azure_client_secret() function must be defined in the same module"
      ],
      "source_code": "def main():\n    \"\"\"Main validation function.\"\"\"\n    print(\"SharePoint Configuration Validator\")\n    print(\"=\" * 40)\n    \n    # Load environment variables\n    env_vars = load_env_file()\n    if env_vars is None:\n        return 1\n    \n    print(\"\u2705 .env file loaded successfully\")\n    print()\n    \n    # Validate SharePoint URL\n    sharepoint_url = env_vars.get('SHAREPOINT_SITE_URL', '')\n    url_valid, url_message = validate_sharepoint_url(sharepoint_url)\n    print(f\"SharePoint URL: {'\u2705' if url_valid else '\u274c'} {url_message}\")\n    if sharepoint_url and sharepoint_url != \"https://your-tenant.sharepoint.com/sites/your-site\":\n        print(f\"  Current value: {sharepoint_url}\")\n    \n    # Validate Azure Client ID\n    client_id = env_vars.get('AZURE_CLIENT_ID', '')\n    id_valid, id_message = validate_azure_client_id(client_id)\n    print(f\"Azure Client ID: {'\u2705' if id_valid else '\u274c'} {id_message}\")\n    \n    # Validate Azure Client Secret\n    client_secret = env_vars.get('AZURE_CLIENT_SECRET', '')\n    secret_valid, secret_message = validate_azure_client_secret(client_secret)\n    print(f\"Azure Client Secret: {'\u2705' if secret_valid else '\u274c'} {secret_message}\")\n    \n    print()\n    \n    # Overall status\n    all_valid = url_valid and id_valid and secret_valid\n    \n    if all_valid:\n        print(\"\ud83c\udf89 All SharePoint configuration looks good!\")\n        print()\n        print(\"Next steps:\")\n        print(\"1. Run connection test: python test_connections.py\")\n        print(\"2. If test passes, try one-time sync: python main.py --once\")\n        return 0\n    else:\n        print(\"\u274c Configuration issues found. Please fix the above issues.\")\n        print()\n        print(\"Setup help:\")\n        print(\"1. See SHAREPOINT_SETUP.md for detailed instructions\")\n        print(\"2. Update your .env file with correct values\")\n        print(\"3. Run this validator again\")\n        return 1",
      "source_file": "/tf/active/vicechatdev/SPFCsync/validate_config.py",
      "tags": [
        "validation",
        "configuration",
        "sharepoint",
        "azure",
        "environment-variables",
        "diagnostic",
        "setup",
        "cli",
        "configuration-checker"
      ],
      "updated_at": "2025-12-07T01:56:14.677511",
      "usage_example": "# Assuming all required validation functions are defined in the same module\n# and a .env file exists with SharePoint configuration\n\nif __name__ == '__main__':\n    exit_code = main()\n    sys.exit(exit_code)\n\n# Or simply call directly:\n# main()\n\n# Expected .env file format:\n# SHAREPOINT_SITE_URL=https://your-tenant.sharepoint.com/sites/your-site\n# AZURE_CLIENT_ID=your-client-id-guid\n# AZURE_CLIENT_SECRET=your-client-secret"
    },
    {
      "best_practices": [
        "This function should be called as the main entry point, typically within an 'if __name__ == \"__main__\":' block",
        "Ensure all helper functions (analyze_logs, print_status, tail_logs, watch_logs) are defined before calling main()",
        "The log file path should be accessible and readable by the process",
        "Default command is 'status' if no command is specified",
        "Use appropriate error handling in helper functions as main() does not catch exceptions",
        "Consider adding signal handlers for graceful shutdown when using the 'watch' command",
        "The function relies on argparse's built-in help system; users can run with --help for usage information"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 09:03:48",
      "decorators": [],
      "dependencies": [
        "argparse",
        "os",
        "sys",
        "time",
        "datetime",
        "re"
      ],
      "description": "Command-line interface entry point for monitoring SharePoint to FileCloud synchronization logs, providing status analysis, log tailing, and real-time watching capabilities.",
      "docstring": "Main entry point.",
      "id": 183,
      "imports": [
        "import os",
        "import sys",
        "import argparse",
        "import time",
        "from datetime import datetime",
        "from datetime import timedelta",
        "import re"
      ],
      "imports_required": [
        "import os",
        "import sys",
        "import argparse",
        "import time",
        "from datetime import datetime",
        "from datetime import timedelta",
        "import re"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 247,
      "line_start": 202,
      "name": "main_v48",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for a CLI tool that monitors and analyzes SharePoint to FileCloud sync operations. It parses command-line arguments to provide three main functionalities: displaying sync status with statistics over a specified time period, showing recent log entries (tail), and watching the log file in real-time. The function delegates to helper functions (analyze_logs, print_status, tail_logs, watch_logs) based on the selected command.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by printing output to stdout based on the selected command and may exit the program if argument parsing fails.",
      "settings_required": [
        "Log file must exist at the specified path (default: 'spfc_sync.log')",
        "Helper functions must be defined in the same module: analyze_logs(log_file, hours), print_status(stats), tail_logs(log_file, lines), watch_logs(log_file)",
        "Log file should contain SharePoint to FileCloud sync operation logs in a parseable format"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point.\"\"\"\n    parser = argparse.ArgumentParser(description=\"Monitor SharePoint to FileCloud Sync\")\n    \n    parser.add_argument(\n        '--log-file',\n        default='spfc_sync.log',\n        help='Path to log file (default: spfc_sync.log)'\n    )\n    \n    parser.add_argument(\n        '--hours',\n        type=int,\n        default=24,\n        help='Hours of history to analyze (default: 24)'\n    )\n    \n    subparsers = parser.add_subparsers(dest='command', help='Commands')\n    \n    # Status command\n    status_parser = subparsers.add_parser('status', help='Show sync status')\n    \n    # Tail command\n    tail_parser = subparsers.add_parser('tail', help='Show recent log entries')\n    tail_parser.add_argument(\n        '--lines',\n        type=int,\n        default=50,\n        help='Number of lines to show (default: 50)'\n    )\n    \n    # Watch command\n    watch_parser = subparsers.add_parser('watch', help='Watch log file in real-time')\n    \n    args = parser.parse_args()\n    \n    if not args.command:\n        args.command = 'status'  # Default command\n    \n    if args.command == 'status':\n        stats = analyze_logs(args.log_file, args.hours)\n        print_status(stats)\n    elif args.command == 'tail':\n        tail_logs(args.log_file, args.lines)\n    elif args.command == 'watch':\n        watch_logs(args.log_file)",
      "source_file": "/tf/active/vicechatdev/SPFCsync/monitor.py",
      "tags": [
        "cli",
        "command-line-interface",
        "log-monitoring",
        "sharepoint",
        "filecloud",
        "sync-monitoring",
        "argparse",
        "log-analysis",
        "real-time-monitoring",
        "entry-point"
      ],
      "updated_at": "2025-12-07T01:56:14.676825",
      "usage_example": "# Run from command line:\n# Show status for last 24 hours (default)\npython script.py status\n\n# Show status for last 48 hours with custom log file\npython script.py --log-file /path/to/sync.log --hours 48 status\n\n# Show last 100 log lines\npython script.py tail --lines 100\n\n# Watch log file in real-time\npython script.py watch\n\n# If calling from Python code:\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "This function should be called as the entry point of a debug/test script, typically within an 'if __name__ == \"__main__\":' block",
        "Ensure Chroma DB server is running before executing this function to avoid connection errors",
        "The function depends on 'test_chroma_collections()' and 'test_collection_creation()' being defined in the same module",
        "Review the printed troubleshooting suggestions if tests fail",
        "This function is designed for development/debugging purposes and should not be used in production code",
        "Consider wrapping the test function calls in try-except blocks for better error handling",
        "The function mentions a specific collection '99_EDR' which suggests it's part of a larger application context"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:47:48",
      "decorators": [],
      "dependencies": [
        "chromadb"
      ],
      "description": "Entry point function that executes a comprehensive test suite for Chroma DB collections, including collection listing and creation tests, followed by troubleshooting suggestions.",
      "docstring": "Main function to run all Chroma DB tests.",
      "id": 142,
      "imports": [
        "import chromadb",
        "import sys",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any"
      ],
      "imports_required": [
        "import chromadb"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 204,
      "line_start": 186,
      "name": "main_v47",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main orchestrator for debugging and testing Chroma DB functionality. It runs multiple test functions to verify Chroma DB connectivity, collection operations, and provides diagnostic output with troubleshooting steps. Primarily used for development, debugging, and validation of Chroma DB setup.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It produces side effects by printing test results and troubleshooting information to stdout.",
      "settings_required": [
        "Chroma DB server must be running and accessible",
        "Correct Chroma DB host and port configuration (typically localhost:8000 or as configured in the application)",
        "Network connectivity to Chroma DB server if running remotely",
        "Appropriate permissions to create and list collections in Chroma DB",
        "The functions 'test_chroma_collections()' and 'test_collection_creation()' must be defined in the same module or imported"
      ],
      "source_code": "def main():\n    \"\"\"Main function to run all Chroma DB tests.\"\"\"\n    \n    print(\"Chroma DB Collections Debug Script\")\n    print(\"==================================\")\n    \n    # Test collections\n    test_chroma_collections()\n    \n    # Test collection creation\n    test_collection_creation()\n    \n    print(f\"\\n\ud83d\udccb TROUBLESHOOTING SUGGESTIONS:\")\n    print(\"1. Check if Chroma DB server is running\")\n    print(\"2. Verify the correct host and port\")\n    print(\"3. Check if 99_EDR collection was created with a different name\")\n    print(\"4. Verify Chroma DB version compatibility\")\n    print(\"5. Check Docker container status if using Docker\")\n    print(\"6. Review Chroma DB logs for any errors\")",
      "source_file": "/tf/active/vicechatdev/test_chroma_collections.py",
      "tags": [
        "testing",
        "debugging",
        "chroma-db",
        "database",
        "collections",
        "diagnostics",
        "troubleshooting",
        "entry-point",
        "main-function",
        "vector-database"
      ],
      "updated_at": "2025-12-07T01:56:14.676119",
      "usage_example": "# Ensure Chroma DB server is running\n# Ensure test_chroma_collections() and test_collection_creation() are defined\n\nif __name__ == '__main__':\n    main()\n\n# Expected output:\n# Chroma DB Collections Debug Script\n# ==================================\n# [Output from test_chroma_collections()]\n# [Output from test_collection_creation()]\n# \n# \ud83d\udccb TROUBLESHOOTING SUGGESTIONS:\n# 1. Check if Chroma DB server is running\n# 2. Verify the correct host and port\n# 3. Check if 99_EDR collection was created with a different name\n# 4. Verify Chroma DB version compatibility\n# 5. Check Docker container status if using Docker\n# 6. Review Chroma DB logs for any errors"
    },
    {
      "best_practices": [
        "This function is designed as a CLI entry point and should be called from __main__ block",
        "The function handles all exceptions at the top level and provides user-friendly error messages",
        "User input is normalized (stripped and lowercased) for robust confirmation handling",
        "The two-phase approach (analyze then execute) prevents accidental data modifications",
        "Always check the return value to determine if the operation succeeded",
        "Ensure PylontechMover class is properly implemented with analyze_before_move() and execute_move() methods",
        "The function assumes specific folder and document names ('pylontech', 'Myfolder', 'Otherfolder') - modify PylontechMover for different use cases",
        "Console output uses emoji which may not render correctly in all terminal environments"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:53:20",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "hashlib",
        "uuid",
        "base64",
        "zlib",
        "re",
        "pathlib",
        "crc32c"
      ],
      "description": "Interactive CLI function that orchestrates the movement of a 'pylontech' document from 'Myfolder' to 'Otherfolder' on a reMarkable device, with user confirmation before execution.",
      "docstring": "Main function - analyze first, then optionally execute",
      "id": 2114,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "import re",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "imports_required": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "import re",
        "from pathlib import Path",
        "import crc32c"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 838,
      "line_start": 799,
      "name": "main_v27",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This is the main entry point for a reMarkable document management script. It creates a PylontechMover instance, analyzes the proposed document move operation, displays the analysis results to the user, prompts for confirmation, and executes the move if approved. The function provides comprehensive console feedback with emoji indicators for different stages and outcomes.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the operation completed successfully (either the move succeeded or the user cancelled), False if analysis failed, move operation failed, or an exception occurred during initialization.",
      "settings_required": [
        "PylontechMover class must be defined and available in the same module or imported",
        "RemarkableAuth module must be available for authentication with reMarkable device",
        "Valid reMarkable device credentials/authentication configured",
        "Network connectivity to reMarkable cloud services",
        "Existence of 'pylontech' document in 'Myfolder' on the reMarkable device",
        "Existence of 'Otherfolder' as a valid destination folder on the reMarkable device"
      ],
      "source_code": "def main():\n    \"\"\"Main function - analyze first, then optionally execute\"\"\"\n    try:\n        mover = PylontechMover()\n        \n        print(f\"\ud83e\uddea PYLONTECH DOCUMENT MOVE SCRIPT\")\n        print(\"=\" * 50)\n        print(\"This script will move the 'pylontech' document from 'Myfolder' to 'Otherfolder'\")\n        print(\"\")\n        \n        # First, analyze what we'll do\n        analysis_data = mover.analyze_before_move()\n        \n        if not analysis_data:\n            print(f\"\\n\u274c Analysis failed - cannot proceed\")\n            return False\n        \n        # Ask user if they want to proceed\n        print(f\"\\n\ud83e\udd14 READY TO EXECUTE\")\n        print(\"=\" * 20)\n        response = input(\"Do you want to proceed with the move operation? (y/N): \").strip().lower()\n        \n        if response in ['y', 'yes']:\n            print(f\"\\n\ud83d\ude80 Proceeding with move operation...\")\n            success = mover.execute_move(analysis_data)\n            \n            if success:\n                print(f\"\\n\u2705 Move completed successfully!\")\n                print(f\"\ud83d\udca1 Check your reMarkable device - the pylontech document should now be in Otherfolder\")\n            else:\n                print(f\"\\n\u274c Move operation failed\")\n            \n            return success\n        else:\n            print(f\"\\n\u23f9\ufe0f Move operation cancelled by user\")\n            return True\n        \n    except Exception as e:\n        print(f\"\u274c Script failed to initialize: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_move_pylontech_fixed.py",
      "tags": [
        "cli",
        "interactive",
        "document-management",
        "remarkable",
        "file-operations",
        "user-confirmation",
        "orchestration",
        "main-entry-point",
        "error-handling"
      ],
      "updated_at": "2025-12-07T01:53:20.094008",
      "usage_example": "if __name__ == '__main__':\n    # Run the main function to move pylontech document\n    success = main()\n    \n    # Exit with appropriate status code\n    import sys\n    sys.exit(0 if success else 1)"
    },
    {
      "best_practices": [
        "Always provide exactly one command-line argument (the document UUID) when running this script",
        "Ensure the DocumentToTrashMover class is properly defined and imported before calling this function",
        "Verify that authentication credentials for reMarkable API are configured before execution",
        "The function expects a valid UUID format for the document identifier",
        "Check the return value to determine if the operation succeeded",
        "This function is designed to be called as a script entry point (if __name__ == '__main__')",
        "Error handling is basic - consider wrapping calls in additional try-except blocks for production use"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported inside the function body, only when main() is called",
          "import": "import sys",
          "optional": false
        }
      ],
      "created_at": "2025-12-07 00:47:40",
      "decorators": [],
      "dependencies": [
        "json",
        "time",
        "hashlib",
        "uuid",
        "base64",
        "zlib",
        "pathlib",
        "sys",
        "crc32c"
      ],
      "description": "Command-line interface function that moves a single reMarkable document to trash by accepting a document UUID as a command-line argument.",
      "docstring": "Move a single document to trash via command line parameter",
      "id": 2100,
      "imports": [
        "import json",
        "import time",
        "import hashlib",
        "import uuid",
        "import base64",
        "import zlib",
        "from pathlib import Path",
        "from auth import RemarkableAuth",
        "import crc32c",
        "import sys"
      ],
      "imports_required": [
        "import sys",
        "from auth import RemarkableAuth",
        "import crc32c"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 478,
      "line_start": 446,
      "name": "main_v45",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no direct parameters. It reads arguments from sys.argv where sys.argv[1] should contain the document UUID string to be moved to trash."
      },
      "parent_class": null,
      "purpose": "This function serves as the entry point for a command-line tool that moves reMarkable documents to trash. It validates command-line arguments, instantiates a DocumentToTrashMover object, executes the trash operation, and provides user-friendly console feedback with emoji indicators for success or failure states.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the document was successfully moved to trash, False if the operation failed due to incorrect arguments, initialization errors, or trash operation failure.",
      "settings_required": [
        "DocumentToTrashMover class must be available in the same module or imported",
        "RemarkableAuth module must be available with proper authentication credentials configured",
        "Valid reMarkable API credentials/tokens for authentication",
        "Network connectivity to reMarkable cloud services"
      ],
      "source_code": "def main():\n    \"\"\"Move a single document to trash via command line parameter\"\"\"\n    import sys\n    \n    if len(sys.argv) != 2:\n        print(\"Usage: python move_documents_to_trash.py <document_uuid>\")\n        print(\"Example: python move_documents_to_trash.py 206f5df3-07c2-4341-8afd-2b7362aefa91\")\n        return False\n    \n    document_uuid = sys.argv[1]\n    \n    try:\n        mover = DocumentToTrashMover()\n        \n        print(f\"\ud83d\uddd1\ufe0f Moving Document to Trash\")\n        print(f\"Document UUID: {document_uuid}\")\n        print(\"=\" * 60)\n        \n        success = mover.move_document_to_trash(document_uuid)\n        \n        if success:\n            print(f\"\\n\ud83c\udf89 SUCCESS!\")\n            print(f\"\u2705 Document {document_uuid[:8]}... moved to trash successfully!\")\n            print(f\"\ud83d\udcf1 Check your reMarkable device - document should now be in trash\")\n        else:\n            print(f\"\\n\u274c FAILED!\")\n            print(f\"\u274c Failed to move document {document_uuid[:8]}... to trash\")\n        \n        return success\n        \n    except Exception as e:\n        print(f\"\u274c Failed to initialize: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/move_documents_to_trash.py",
      "tags": [
        "command-line",
        "cli",
        "remarkable",
        "document-management",
        "trash",
        "uuid",
        "file-operations",
        "cloud-sync",
        "user-interface",
        "entry-point"
      ],
      "updated_at": "2025-12-07T01:53:20.042777",
      "usage_example": "# Run from command line:\n# python move_documents_to_trash.py 206f5df3-07c2-4341-8afd-2b7362aefa91\n\n# Or call programmatically:\nif __name__ == '__main__':\n    success = main()\n    if success:\n        print('Document moved successfully')\n    else:\n        print('Operation failed')"
    },
    {
      "best_practices": [
        "Ensure the create_signature_image function is properly defined before calling main()",
        "Configure logging before calling this function to see informational messages",
        "Verify write permissions exist in the target directory before execution",
        "The function uses os.path.dirname(__file__) which requires the script to be run as a file (not in interactive mode)",
        "The hardcoded list of names should be modified if different sample signatures are needed",
        "Consider making the names list a parameter if this function needs to be more flexible",
        "The function assumes create_signature_image handles errors internally; add error handling if needed"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "required by the create_signature_image function that this main() function calls",
          "import": "from PIL import Image",
          "optional": false
        },
        {
          "condition": "required by the create_signature_image function that this main() function calls",
          "import": "from PIL import ImageDraw",
          "optional": false
        },
        {
          "condition": "required by the create_signature_image function that this main() function calls",
          "import": "from PIL import ImageFont",
          "optional": false
        },
        {
          "condition": "required by the create_signature_image function that this main() function calls",
          "import": "import numpy as np",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 10:26:29",
      "decorators": [],
      "dependencies": [
        "os",
        "logging",
        "PIL",
        "numpy"
      ],
      "description": "Generates sample signature images (PNG files) for a predefined list of names and saves them to a 'signatures' directory.",
      "docstring": "Generate sample signatures for the example JSON data",
      "id": 421,
      "imports": [
        "import os",
        "import logging",
        "import sys",
        "import random",
        "from PIL import Image",
        "from PIL import ImageDraw",
        "from PIL import ImageFont",
        "import numpy as np"
      ],
      "imports_required": [
        "import os",
        "import logging"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 168,
      "line_start": 142,
      "name": "main_v44",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function automates the creation of signature image files for testing or demonstration purposes in an audit system. It creates a signatures directory if it doesn't exist, then generates PNG signature images for four predefined names (Jane Smith, John Doe, Emily Johnson, Michael Brown) representing different roles in an audit workflow (Author, Reviewer 1, Reviewer 2, Approver). The generated signatures can be used with a main application that supports signature inclusion.",
      "return_annotation": null,
      "return_explained": "This function returns None (implicitly). It performs side effects by creating a directory and generating PNG image files on the filesystem.",
      "settings_required": [
        "A logger object must be configured and available in the module scope",
        "A create_signature_image(name, output_path) function must be defined in the same module or imported",
        "Write permissions in the directory where the script is located to create the 'signatures' subdirectory"
      ],
      "source_code": "def main():\n    \"\"\"Generate sample signatures for the example JSON data\"\"\"\n    # Create signatures directory if it doesn't exist\n    signatures_dir = os.path.join(os.path.dirname(__file__), 'signatures')\n    if not os.path.exists(signatures_dir):\n        os.makedirs(signatures_dir)\n        logger.info(f\"Created signatures directory: {signatures_dir}\")\n    \n    # Sample names from the example audit data\n    names = [\n        \"Jane Smith\",       # Author\n        \"John Doe\",         # Reviewer 1\n        \"Emily Johnson\",    # Reviewer 2\n        \"Michael Brown\"     # Approver\n    ]\n    \n    # Generate signatures for each name\n    for name in names:\n        # Create a filename from the name (lower case, spaces to underscores)\n        filename = name.lower().replace(\" \", \"_\") + \".png\"\n        output_path = os.path.join(signatures_dir, filename)\n        \n        # Create the signature\n        create_signature_image(name, output_path)\n    \n    logger.info(f\"Generated {len(names)} sample signatures in {signatures_dir}\")\n    logger.info(\"You can now run main.py with include_signatures=True\")",
      "source_file": "/tf/active/vicechatdev/document_auditor/generate_sample_signatures.py",
      "tags": [
        "signature-generation",
        "image-processing",
        "file-creation",
        "audit-system",
        "setup-utility",
        "PIL",
        "directory-management",
        "sample-data"
      ],
      "updated_at": "2025-12-07T01:53:20.042024",
      "usage_example": "import os\nimport logging\nfrom PIL import Image, ImageDraw, ImageFont\nimport numpy as np\n\n# Setup logger\nlogger = logging.getLogger(__name__)\nlogging.basicConfig(level=logging.INFO)\n\n# Define the create_signature_image function (required dependency)\ndef create_signature_image(name, output_path):\n    img = Image.new('RGB', (300, 100), color='white')\n    draw = ImageDraw.Draw(img)\n    draw.text((10, 40), name, fill='black')\n    img.save(output_path)\n    logger.info(f\"Created signature: {output_path}\")\n\n# Call the main function\nmain()\n\n# Result: Creates 'signatures' directory with 4 PNG files:\n# - jane_smith.png\n# - john_doe.png\n# - emily_johnson.png\n# - michael_brown.png"
    },
    {
      "best_practices": [
        "Ensure all template files exist at the specified paths before running this function",
        "The function depends on test_template_with_data() which must be implemented separately to perform actual template validation",
        "Template paths are hardcoded to /tf/active/CDocs/templates/email/ - modify these if your directory structure differs",
        "Test data structures should match the actual data structures used in production for accurate validation",
        "The function prints detailed output to console - capture stdout if you need to log results",
        "Both review_data and approval_data dictionaries contain overlapping fields (doc_number, title, etc.) which should be consistent across all templates",
        "Consider parameterizing template paths and URLs for better reusability across different environments"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 07:57:08",
      "decorators": [],
      "dependencies": [
        "os",
        "re"
      ],
      "description": "A test function that validates email template rendering by testing multiple HTML email templates with sample data structures for document review and approval workflows.",
      "docstring": "Test both review and approval templates with their respective data structures.",
      "id": 3,
      "imports": [
        "import re",
        "import os"
      ],
      "imports_required": [
        "import os",
        "import re"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 113,
      "line_start": 45,
      "name": "main_v43",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a comprehensive test suite for email template placeholder resolution in a Controlled Document Management System (CDocs). It validates that review and approval notification templates correctly render with their respective data structures, ensuring all placeholders are properly resolved. The function tests four different email templates: review_requested, approval_requested, document_updated, and approval_completed, using appropriate test data for each scenario.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value (all_passed) indicating whether all template tests passed successfully. Returns True if all templates exist and render without issues, False if any template is missing or fails validation.",
      "settings_required": [
        "Template files must exist at specified paths: /tf/active/CDocs/templates/email/review_requested.html, /tf/active/CDocs/templates/email/approval_requested.html, /tf/active/CDocs/templates/email/document_updated.html, /tf/active/CDocs/templates/email/approval_completed.html",
        "A function named 'test_template_with_data' must be defined in the same module to perform actual template validation",
        "CDocs application must be accessible at https://cdocs.vicebio.com"
      ],
      "source_code": "def main():\n    \"\"\"Test both review and approval templates with their respective data structures.\"\"\"\n    print(\"Testing Email Template Placeholder Resolution...\")\n    \n    # Test data for review notifications (like the working example)\n    review_data = {\n        'doc_number': 'EXPENSE_NOTE_003',\n        'title': 'double clone testing custom doc number',\n        'doc_type': 'FORM',\n        'version_number': '0.2',\n        'due_date': '2025-07-07T23:59:59.999999',\n        'document_url': 'https://cdocs.vicebio.com/document/test-uid-123',\n        'app_name': 'Controlled Document Management System',\n        'app_url': 'https://cdocs.vicebio.com',\n        'current_year': 2025,\n        'sender_name': 'CDocs System',\n        'cdocs_app_url': 'https://cdocs.vicebio.com',\n        'message': 'You have been requested to review this document.',\n        'instructions': 'testing if templates work'\n    }\n    \n    # Test data for approval notifications (should now work with the fix)\n    approval_data = {\n        'doc_number': 'DOC-001',\n        'title': 'Test Document for Approval',\n        'doc_type': 'SOP',\n        'version_number': '1.0',\n        'due_date': 'June 30, 2025',\n        'status': 'PENDING_APPROVAL',\n        'document_url': 'https://cdocs.vicebio.com/document/test-uid-123',\n        'approval_url': 'https://cdocs.vicebio.com/approval/approval-uid-456',\n        'app_name': 'Controlled Document Management System',\n        'app_url': 'https://cdocs.vicebio.com',\n        'current_year': 2025,\n        'sender_name': 'CDocs System',\n        'cdocs_app_url': 'https://cdocs.vicebio.com',\n        'message': 'You have been requested to approve this document.',\n        'instructions': 'with updated templates'\n    }\n    \n    templates_to_test = [\n        ('/tf/active/CDocs/templates/email/review_requested.html', review_data, 'Review Requested'),\n        ('/tf/active/CDocs/templates/email/approval_requested.html', approval_data, 'Approval Requested'),\n        ('/tf/active/CDocs/templates/email/document_updated.html', approval_data, 'Document Updated'),\n        ('/tf/active/CDocs/templates/email/approval_completed.html', approval_data, 'Approval Completed'),\n    ]\n    \n    all_passed = True\n    \n    for template_path, test_data, template_name in templates_to_test:\n        if os.path.exists(template_path):\n            success = test_template_with_data(template_path, test_data, template_name)\n            if not success:\n                all_passed = False\n        else:\n            print(f\"\u274c {template_name}: Template file not found: {template_path}\")\n            all_passed = False\n    \n    print(\"\\n\" + \"=\"*60)\n    if all_passed:\n        print(\"\u2705 ALL TESTS PASSED - Templates should render correctly!\")\n        print(\"\\nKey findings:\")\n        print(\"- Review templates work because they get document data via 'details'\")\n        print(\"- Approval templates now work because we fixed the controller\")\n        print(\"- Both follow the same data structure with doc_number, title, etc.\")\n    else:\n        print(\"\u274c SOME TESTS FAILED - Check the issues above\")\n    \n    return all_passed",
      "source_file": "/tf/active/vicechatdev/test_comprehensive_templates.py",
      "tags": [
        "testing",
        "email-templates",
        "validation",
        "document-management",
        "template-rendering",
        "placeholder-resolution",
        "approval-workflow",
        "review-workflow",
        "CDocs",
        "notification-system"
      ],
      "updated_at": "2025-12-07T01:53:20.041271",
      "usage_example": "# Ensure test_template_with_data function is defined\nimport os\nimport re\n\ndef test_template_with_data(template_path, data, name):\n    # Implementation of template testing logic\n    with open(template_path, 'r') as f:\n        template_content = f.read()\n    # Validate placeholders are resolved\n    return True\n\n# Run the main test function\nif __name__ == '__main__':\n    result = main()\n    if result:\n        print('All email templates validated successfully')\n    else:\n        print('Some templates failed validation')"
    },
    {
      "best_practices": [
        "This function should be called within an if __name__ == '__main__': block to prevent execution when the module is imported",
        "Ensure ProjectVictoriaDisclosureGenerator is properly initialized with all required dependencies before calling this function",
        "Set up proper error handling in ProjectVictoriaDisclosureGenerator.run_complete_analysis() as this function relies on its return value",
        "Verify all required environment variables and configuration files are present before execution",
        "Consider adding command-line argument parsing if the function needs to support different execution modes or parameters",
        "The function provides user-friendly output but does not handle exceptions - ensure the generator class handles errors gracefully",
        "Review the generated output file path and ensure it's accessible to the user running the script"
      ],
      "class_interface": {},
      "complexity": "simple",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:50:25",
      "decorators": [],
      "dependencies": [
        "os",
        "re",
        "json",
        "pandas",
        "numpy",
        "typing",
        "datetime",
        "tiktoken",
        "chromadb",
        "langchain_openai",
        "langchain",
        "warnings",
        "fitz",
        "sentence_transformers",
        "openai",
        "traceback"
      ],
      "description": "Entry point function that orchestrates the Project Victoria disclosure analysis by initializing the generator, running the complete analysis, and displaying results with next steps.",
      "docstring": "Main function to run the Project Victoria disclosure analysis.",
      "id": 149,
      "imports": [
        "import os",
        "import re",
        "import json",
        "import pandas as pd",
        "import numpy as np",
        "from typing import List",
        "from typing import Dict",
        "from typing import Any",
        "from typing import Tuple",
        "from typing import Optional",
        "from datetime import datetime",
        "import tiktoken",
        "import chromadb",
        "from chromadb import Documents",
        "from chromadb import EmbeddingFunction",
        "from chromadb import Embeddings",
        "from langchain_openai import ChatOpenAI",
        "from langchain.prompts import PromptTemplate",
        "import warnings",
        "import fitz",
        "from sentence_transformers import CrossEncoder",
        "from openai import OpenAI",
        "import traceback"
      ],
      "imports_required": [
        "from typing import List, Dict, Any, Tuple, Optional",
        "import os",
        "import re",
        "import json",
        "import pandas as pd",
        "import numpy as np",
        "from datetime import datetime",
        "import tiktoken",
        "import chromadb",
        "from chromadb import Documents, EmbeddingFunction, Embeddings",
        "from langchain_openai import ChatOpenAI",
        "from langchain.prompts import PromptTemplate",
        "import warnings",
        "import fitz",
        "from sentence_transformers import CrossEncoder",
        "from openai import OpenAI",
        "import traceback"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 878,
      "line_start": 859,
      "name": "main_v42",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for the Project Victoria Disclosure Generator application. It provides a user-friendly command-line interface that initializes the disclosure generator, executes the complete analysis workflow, and provides feedback on success or failure along with actionable next steps for the user. It's designed to be called directly when running the script as a standalone application.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It produces side effects by printing output to the console and generating a disclosure report file through the ProjectVictoriaDisclosureGenerator instance.",
      "settings_required": [
        "ProjectVictoriaDisclosureGenerator class must be defined and available in the same module or imported",
        "OPENAI_API_KEY environment variable (required by ProjectVictoriaDisclosureGenerator)",
        "Source documents and data files required by ProjectVictoriaDisclosureGenerator",
        "Appropriate file system permissions for reading input files and writing output reports",
        "ChromaDB configuration and storage location (if used by the generator)",
        "Sufficient disk space for generated disclosure reports"
      ],
      "source_code": "def main():\n    \"\"\"Main function to run the Project Victoria disclosure analysis.\"\"\"\n    print(\"Project Victoria Disclosure Generator\")\n    print(\"====================================\")\n    \n    # Initialize generator\n    generator = ProjectVictoriaDisclosureGenerator()\n    \n    # Run complete analysis\n    output_file = generator.run_complete_analysis()\n    \n    if output_file:\n        print(f\"\\\\n\u2705 Disclosure report generated: {output_file}\")\n        print(\"\\\\nNext steps:\")\n        print(\"1. Review the generated disclosures\")\n        print(\"2. Verify accuracy against source documents\")\n        print(\"3. Add any additional information as needed\")\n        print(\"4. Format for final legal documentation\")\n    else:\n        print(\"\\\\n\u274c Analysis failed. Please check error messages above.\")",
      "source_file": "/tf/active/vicechatdev/project_victoria_disclosure_generator.py",
      "tags": [
        "main-entry-point",
        "cli",
        "disclosure-analysis",
        "legal-documentation",
        "project-victoria",
        "orchestration",
        "workflow",
        "report-generation",
        "command-line-interface"
      ],
      "updated_at": "2025-12-07T01:53:20.040159",
      "usage_example": "# Assuming ProjectVictoriaDisclosureGenerator is defined in the same module\n# and all required environment variables and data files are set up\n\nif __name__ == '__main__':\n    main()\n\n# Expected console output:\n# Project Victoria Disclosure Generator\n# ====================================\n# [Processing messages from generator]\n# \u2705 Disclosure report generated: disclosure_report_20240115_143022.docx\n# \n# Next steps:\n# 1. Review the generated disclosures\n# 2. Verify accuracy against source documents\n# 3. Add any additional information as needed\n# 4. Format for final legal documentation"
    },
    {
      "best_practices": [
        "The function uses hardcoded file paths which limits reusability. Consider accepting file paths as command-line arguments or configuration parameters.",
        "API key validation is performed at runtime. Ensure the OPENAI_API_KEY environment variable is set before execution.",
        "The function catches all exceptions generically. Consider more specific exception handling for different error types (file not found, API errors, etc.).",
        "The MeetingMinutesGenerator class must be defined elsewhere in the codebase with methods: load_transcript(), generate_meeting_minutes(), and save_minutes().",
        "This function is designed to be called as a script entry point (if __name__ == '__main__': main()).",
        "Console output is used for user feedback. Redirect or capture stdout if running in automated environments.",
        "The meeting title 'Development Team Meeting - June 18, 2025' is hardcoded. Consider making this configurable or deriving it from the transcript."
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 08:27:12",
      "decorators": [],
      "dependencies": [
        "os",
        "openai"
      ],
      "description": "Entry point function that orchestrates the process of loading a meeting transcript, generating structured meeting minutes using OpenAI's GPT-4o API, and saving the output to a file.",
      "docstring": "Main function to process transcript and generate meeting minutes.",
      "id": 85,
      "imports": [
        "import os",
        "import re",
        "from datetime import datetime",
        "from typing import List",
        "from typing import Dict",
        "from typing import Tuple",
        "import openai"
      ],
      "imports_required": [
        "import os",
        "import openai"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 173,
      "line_start": 139,
      "name": "main_v41",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main execution entry point for a meeting minutes generation application. It handles the complete workflow: validates environment configuration (OpenAI API key), initializes the MeetingMinutesGenerator class, loads a transcript from a hardcoded file path, generates formatted meeting minutes using GPT-4o, and saves the results to an output file. It includes error handling and user feedback through console messages.",
      "return_annotation": null,
      "return_explained": "Returns None. The function performs side effects (file I/O, console output) but does not return any value. Exits early with return if API key validation fails.",
      "settings_required": [
        "OPENAI_API_KEY environment variable must be set with a valid OpenAI API key",
        "MeetingMinutesGenerator class must be defined and available in the same module or imported",
        "Input transcript file must exist at path: /tf/active/leexi/leexi-20250618-transcript-development_team_meeting.md",
        "Write permissions required for output directory: /tf/active/"
      ],
      "source_code": "def main():\n    \"\"\"Main function to process transcript and generate meeting minutes.\"\"\"\n    \n    # Configuration\n    API_KEY = os.getenv('OPENAI_API_KEY')\n    if not API_KEY:\n        print(\"Error: OPENAI_API_KEY environment variable not set\")\n        print(\"Please set your OpenAI API key as an environment variable:\")\n        print(\"export OPENAI_API_KEY='your-api-key-here'\")\n        return\n    \n    # File paths\n    transcript_path = \"/tf/active/leexi/leexi-20250618-transcript-development_team_meeting.md\"\n    output_path = \"/tf/active/meeting_minutes_2025-06-18.md\"\n    \n    try:\n        # Initialize generator\n        generator = MeetingMinutesGenerator(API_KEY)\n        \n        # Load transcript\n        print(\"Loading transcript...\")\n        transcript = generator.load_transcript(transcript_path)\n        \n        # Generate meeting minutes\n        print(\"Generating meeting minutes with GPT-4o...\")\n        minutes = generator.generate_meeting_minutes(transcript, \"Development Team Meeting - June 18, 2025\")\n        \n        # Save results\n        generator.save_minutes(minutes, output_path)\n        \n        print(\"Meeting minutes generation completed successfully!\")\n        print(f\"Output saved to: {output_path}\")\n        \n    except Exception as e:\n        print(f\"Error: {e}\")",
      "source_file": "/tf/active/vicechatdev/meeting_minutes_generator.py",
      "tags": [
        "main-function",
        "entry-point",
        "meeting-minutes",
        "transcript-processing",
        "openai",
        "gpt-4o",
        "file-io",
        "orchestration",
        "workflow",
        "error-handling"
      ],
      "updated_at": "2025-12-07T01:53:20.039500",
      "usage_example": "# Set environment variable before running\n# export OPENAI_API_KEY='sk-your-api-key-here'\n\n# Ensure MeetingMinutesGenerator class is defined\n# Then call the function:\nif __name__ == '__main__':\n    main()\n\n# Expected console output:\n# Loading transcript...\n# Generating meeting minutes with GPT-4o...\n# Meeting minutes generation completed successfully!\n# Output saved to: /tf/active/meeting_minutes_2025-06-18.md"
    },
    {
      "best_practices": [
        "Ensure the RemarkableUploadTests class is properly implemented before calling this function",
        "Verify that authentication credentials are configured correctly to avoid API failures",
        "Check that sufficient disk space is available for saving analysis results and HTTP logs",
        "Run this function in a context where network access to Remarkable services is available",
        "Review the generated JSON files in the test_results directory for detailed analysis output",
        "The function uses exception handling to catch all errors, so check the return value to determine success",
        "Timestamped filenames prevent overwriting previous analysis results",
        "Raw HTTP logs can be large - monitor disk usage if running frequently"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-07 00:29:57",
      "decorators": [],
      "dependencies": [
        "pathlib",
        "json",
        "time",
        "requests",
        "reportlab",
        "typing"
      ],
      "description": "Orchestrates a comprehensive analysis of Remarkable cloud state and replica synchronization, capturing detailed HTTP logs and saving results to JSON files.",
      "docstring": "Run comprehensive cloud and replica analysis",
      "id": 2054,
      "imports": [
        "import os",
        "import json",
        "import time",
        "from pathlib import Path",
        "from typing import Dict",
        "from typing import Any",
        "import uuid",
        "import requests",
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from reportlab.pdfgen import canvas",
        "from reportlab.lib.pagesizes import letter",
        "from local_replica_v2 import RemarkableReplicaBuilder"
      ],
      "imports_required": [
        "import os",
        "import json",
        "import time",
        "from pathlib import Path",
        "from typing import Dict, Any",
        "import uuid",
        "import requests",
        "from auth import RemarkableAuth",
        "from upload_manager import RemarkableUploadManager",
        "from reportlab.pdfgen import canvas",
        "from reportlab.lib.pagesizes import letter",
        "from local_replica_v2 import RemarkableReplicaBuilder"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1306,
      "line_start": 1275,
      "name": "main_v40",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main entry point for running a complete diagnostic analysis of Remarkable cloud services and replica sync processes. It initializes a test suite with raw logging enabled, executes comprehensive cloud analysis, saves the results to timestamped JSON files, and captures raw HTTP logs for detailed debugging. This is useful for troubleshooting sync issues, understanding cloud state, and analyzing API interactions with Remarkable services.",
      "return_annotation": null,
      "return_explained": "Returns a boolean value: True if the analysis completes successfully (including initialization, analysis execution, and file saving), False if any exception occurs during the process. The actual analysis results are saved to disk rather than returned directly.",
      "settings_required": [
        "RemarkableAuth module must be properly configured with authentication credentials",
        "Write permissions for creating 'test_results' directory in the script's parent directory",
        "Valid Remarkable cloud API credentials configured in the auth module",
        "RemarkableUploadTests class must be available and properly implemented",
        "Network connectivity to Remarkable cloud services"
      ],
      "source_code": "def main():\n    \"\"\"Run comprehensive cloud and replica analysis\"\"\"\n    try:\n        # Initialize test suite with raw logging enabled\n        test_suite = RemarkableUploadTests(enable_raw_logging=True)\n        \n        print(\"\\n\ufffd COMPREHENSIVE CLOUD & REPLICA ANALYSIS\")\n        print(\"This will analyze both cloud state and replica sync process\")\n        print(\"=\" * 60)\n        \n        # Run comprehensive analysis\n        analysis_results = test_suite.comprehensive_cloud_analysis()\n        \n        # Save analysis results\n        results_file = Path(__file__).parent / \"test_results\" / f\"comprehensive_analysis_{int(time.time())}.json\"\n        results_file.parent.mkdir(exist_ok=True)\n        \n        with open(results_file, 'w') as f:\n            json.dump(analysis_results, f, indent=2, default=str)\n        \n        print(f\"\\n\ud83d\udcbe Analysis results saved to: {results_file}\")\n        \n        # Save raw HTTP logs for detailed analysis\n        log_file = test_suite.save_raw_logs()\n        if log_file:\n            print(f\"\ud83d\udd0d Raw HTTP logs captured for detailed request analysis\")\n        \n        return True\n        \n    except Exception as e:\n        print(f\"\u274c Analysis failed to initialize: {e}\")\n        return False",
      "source_file": "/tf/active/vicechatdev/e-ink-llm/cloudtest/test_uploads.py",
      "tags": [
        "remarkable",
        "cloud-analysis",
        "replica-sync",
        "diagnostic",
        "testing",
        "http-logging",
        "file-output",
        "orchestration",
        "main-entry-point",
        "error-handling"
      ],
      "updated_at": "2025-12-07T01:53:20.038824",
      "usage_example": "if __name__ == '__main__':\n    # Run the comprehensive analysis\n    success = main()\n    \n    if success:\n        print('Analysis completed successfully')\n        print('Check the test_results directory for output files')\n    else:\n        print('Analysis failed - check error messages above')\n        exit(1)"
    },
    {
      "best_practices": [
        "Ensure SOURCE_PATH and LOCAL_BASE_DIR are properly defined as module-level constants before calling this function",
        "The login_filecloud() function should handle credential management securely (avoid hardcoding credentials)",
        "Ensure sufficient disk space is available at LOCAL_BASE_DIR before running synchronization",
        "The function creates directories with parents=True and exist_ok=True, which is safe for repeated executions",
        "Consider wrapping the main() call in a try-except block to handle unexpected errors gracefully",
        "The function performs early exit if login fails, which is good practice for dependency validation",
        "Monitor console output for sync progress and completion status",
        "The hardcoded path mapping ('01_Research_UQ') suggests this is specific to a particular organizational structure"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 20:04:24",
      "decorators": [],
      "dependencies": [
        "requests",
        "xmltodict",
        "pathlib",
        "datetime",
        "zoneinfo"
      ],
      "description": "Main entry point function that orchestrates a file synchronization process from a FileCloud source to a local directory, with progress reporting and error handling.",
      "docstring": "Main execution",
      "id": 1716,
      "imports": [
        "import os",
        "import requests",
        "import xmltodict",
        "from pathlib import Path",
        "from datetime import datetime",
        "from zoneinfo import ZoneInfo"
      ],
      "imports_required": [
        "import os",
        "import requests",
        "import xmltodict",
        "from pathlib import Path",
        "from datetime import datetime",
        "from zoneinfo import ZoneInfo"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 195,
      "line_start": 170,
      "name": "main_v39",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as the main execution entry point for the UQchat FileCloud Sync application. It handles the complete workflow of: (1) displaying sync configuration information, (2) creating necessary local directories, (3) authenticating with FileCloud service, (4) initiating the directory synchronization process from a remote FileCloud path to a local filesystem location, and (5) reporting completion status. The function is designed to sync files from a specific FileCloud shared directory structure to a local base directory.",
      "return_annotation": null,
      "return_explained": "Returns None implicitly. The function performs side effects (file synchronization, console output) but does not return any value. Early return occurs if FileCloud login fails.",
      "settings_required": [
        "SOURCE_PATH global variable or constant must be defined (FileCloud source path)",
        "LOCAL_BASE_DIR global variable or constant must be defined (local destination directory as Path object)",
        "login_filecloud() function must be defined and available in the same module",
        "sync_directory() function must be defined and available in the same module",
        "FileCloud credentials must be configured for login_filecloud() function (likely environment variables or config file)",
        "Write permissions required for LOCAL_BASE_DIR location",
        "Network access to FileCloud service endpoint"
      ],
      "source_code": "def main():\n    \"\"\"Main execution\"\"\"\n    print(\"=\" * 80)\n    print(\"UQchat FileCloud Sync\")\n    print(\"=\" * 80)\n    print(f\"Source: {SOURCE_PATH}\")\n    print(f\"Destination: {LOCAL_BASE_DIR}\")\n    print(\"=\" * 80)\n    \n    # Create base directory if it doesn't exist\n    LOCAL_BASE_DIR.mkdir(parents=True, exist_ok=True)\n    \n    # Login to FileCloud\n    session = login_filecloud()\n    if not session:\n        print(\"Failed to login. Exiting.\")\n        return\n    \n    # Start syncing from the source path\n    # Map FileCloud path to local directory structure\n    # Remove the prefix '/SHARED/vicebio_shares/03_Non_Clinical/' from paths\n    sync_directory(session, SOURCE_PATH, LOCAL_BASE_DIR / '01_Research_UQ')\n    \n    print(\"\\n\" + \"=\" * 80)\n    print(\"\u2713 Sync complete!\")\n    print(\"=\" * 80)",
      "source_file": "/tf/active/vicechatdev/UQchat/download_uq_files.py",
      "tags": [
        "file-sync",
        "filecloud",
        "main-entry-point",
        "cloud-storage",
        "directory-sync",
        "file-management",
        "authentication",
        "console-output",
        "orchestration",
        "workflow"
      ],
      "updated_at": "2025-12-07T01:53:20.038114",
      "usage_example": "# Define required global variables and functions first\nfrom pathlib import Path\nimport requests\nimport xmltodict\n\nSOURCE_PATH = '/SHARED/vicebio_shares/03_Non_Clinical/'\nLOCAL_BASE_DIR = Path('./local_sync')\n\ndef login_filecloud():\n    # Implementation to authenticate with FileCloud\n    # Returns session object or None on failure\n    session = requests.Session()\n    # ... authentication logic ...\n    return session\n\ndef sync_directory(session, source, destination):\n    # Implementation to sync files from FileCloud to local\n    pass\n\n# Execute the main function\nif __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Always ensure the vendor Excel file exists before running; the function will list available enriched files if the specified file is not found",
        "Use --test flag when first running to validate setup with only 3 vendors",
        "Use --max-mailboxes parameter to limit scope during development/testing",
        "The function expects extract_batch() to be defined in the same module scope",
        "Ensure all configuration constants are properly set in vendor_email_config.py before running",
        "The default vendor file path is hardcoded; consider using a more generic default or making it required",
        "Function calls sys.exit(1) on error, which terminates the entire Python process",
        "Command-line arguments override default values from vendor_email_config module"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "imported lazily inside the function, always needed when main() is called",
          "import": "import argparse",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 16:26:57",
      "decorators": [],
      "dependencies": [
        "argparse",
        "sys",
        "pandas",
        "pathlib",
        "typing",
        "vendor_email_extractor",
        "vendor_email_config"
      ],
      "description": "Command-line entry point that parses arguments and orchestrates the extraction of vendor emails from all vicebio.com mailboxes using Microsoft Graph API.",
      "docstring": "Main entry point",
      "id": 1264,
      "imports": [
        "import sys",
        "import pandas as pd",
        "from pathlib import Path",
        "from typing import List",
        "from typing import Optional",
        "from vendor_email_extractor import VendorEmailExtractor",
        "from vendor_email_config import TENANT_ID",
        "from vendor_email_config import CLIENT_ID",
        "from vendor_email_config import CLIENT_SECRET",
        "from vendor_email_config import OPENAI_API_KEY",
        "from vendor_email_config import DOMAIN",
        "from vendor_email_config import DEFAULT_DAYS_BACK",
        "from vendor_email_config import DEFAULT_MAX_EMAILS_PER_MAILBOX",
        "import argparse"
      ],
      "imports_required": [
        "import sys",
        "import pandas as pd",
        "from pathlib import Path",
        "from typing import List, Optional",
        "from vendor_email_extractor import VendorEmailExtractor",
        "from vendor_email_config import TENANT_ID, CLIENT_ID, CLIENT_SECRET, OPENAI_API_KEY, DOMAIN, DEFAULT_DAYS_BACK, DEFAULT_MAX_EMAILS_PER_MAILBOX"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 172,
      "line_start": 118,
      "name": "main_v38",
      "parameters": [],
      "parameters_explained": {
        "none": "This function takes no direct parameters. All configuration is handled through command-line arguments parsed internally using argparse"
      },
      "parent_class": null,
      "purpose": "This function serves as the main CLI interface for a vendor email extraction tool. It handles argument parsing for configuration options (vendor file path, mailbox limits, email limits, date ranges, test mode), validates the existence of the vendor Excel file, and delegates to the extract_batch function to perform the actual email extraction. It's designed to be called when running the script directly from the command line.",
      "return_annotation": null,
      "return_explained": "Returns None implicitly. The function either exits with sys.exit(1) if the vendor file is not found, or completes after calling extract_batch(). Side effects include printing error messages and executing the email extraction workflow.",
      "settings_required": [
        "vendor_email_config.py module with TENANT_ID, CLIENT_ID, CLIENT_SECRET, OPENAI_API_KEY, DOMAIN, DEFAULT_DAYS_BACK, DEFAULT_MAX_EMAILS_PER_MAILBOX constants",
        "vendor_email_extractor.py module with VendorEmailExtractor class",
        "extract_batch function must be defined in the same module or imported",
        "Excel file with vendor data (default: 'Vendors list_Vicebio_20112025_VB comments v1_enriched_20251121_225259.xlsx')",
        "Microsoft Azure AD app registration with appropriate Graph API permissions",
        "OpenAI API key for email processing"
      ],
      "source_code": "def main():\n    \"\"\"Main entry point\"\"\"\n    import argparse\n    \n    parser = argparse.ArgumentParser(\n        description=\"Extract vendor emails from all vicebio.com mailboxes\"\n    )\n    parser.add_argument(\n        \"--vendor-file\",\n        default=\"Vendors list_Vicebio_20112025_VB comments v1_enriched_20251121_225259.xlsx\",\n        help=\"Path to vendor Excel file\"\n    )\n    parser.add_argument(\n        \"--max-mailboxes\",\n        type=int,\n        default=None,\n        help=\"Limit number of mailboxes (for testing)\"\n    )\n    parser.add_argument(\n        \"--max-emails\",\n        type=int,\n        default=DEFAULT_MAX_EMAILS_PER_MAILBOX,\n        help=\"Max emails per mailbox per vendor\"\n    )\n    parser.add_argument(\n        \"--days-back\",\n        type=int,\n        default=DEFAULT_DAYS_BACK,\n        help=\"Days to search back\"\n    )\n    parser.add_argument(\n        \"--test\",\n        action=\"store_true\",\n        help=\"Test mode: only process first 3 vendors\"\n    )\n    \n    args = parser.parse_args()\n    \n    # Check if vendor file exists\n    vendor_file = Path(args.vendor_file)\n    if not vendor_file.exists():\n        print(f\"ERROR: Vendor file not found: {vendor_file}\")\n        print(\"\\nLooking for enriched vendor files in current directory:\")\n        for f in Path.cwd().glob(\"*enriched*.xlsx\"):\n            print(f\"  - {f.name}\")\n        sys.exit(1)\n    \n    # Run extraction\n    extract_batch(\n        vendor_excel_file=str(vendor_file),\n        max_mailboxes=args.max_mailboxes,\n        max_emails_per_mailbox=args.max_emails,\n        days_back=args.days_back,\n        test_mode=args.test\n    )",
      "source_file": "/tf/active/vicechatdev/find_email/extract_vendor_batch.py",
      "tags": [
        "cli",
        "command-line",
        "entry-point",
        "argparse",
        "email-extraction",
        "vendor-management",
        "microsoft-graph",
        "batch-processing",
        "file-validation",
        "configuration"
      ],
      "updated_at": "2025-12-07T01:53:20.037391",
      "usage_example": "# Run from command line:\n# python script.py --vendor-file vendors.xlsx --max-mailboxes 5 --max-emails 100 --days-back 30 --test\n\n# Or call directly in Python:\nif __name__ == '__main__':\n    main()\n\n# With custom vendor file:\n# python script.py --vendor-file /path/to/custom_vendors.xlsx\n\n# Test mode (first 3 vendors only):\n# python script.py --test\n\n# Limit mailboxes for testing:\n# python script.py --max-mailboxes 2 --max-emails 50"
    },
    {
      "best_practices": [
        "Ensure vendor_email_config.py exists before running this function, or it will exit early with an error message",
        "The function searches ALL mailboxes with no limits (max_mailboxes=None, max_emails_per_mailbox=None, days_back=None), which may take significant time and API quota for large organizations",
        "This is intended as a demonstration/testing function, not for production use - consider adding parameters for vendor name and search limits",
        "The function assumes VendorEmailExtractor class is defined in the same module",
        "Error handling is minimal - only catches ImportError for missing config file",
        "Results are printed to stdout rather than returned, making it unsuitable for programmatic use",
        "Consider wrapping the main logic in try-except blocks to handle API errors gracefully",
        "For production use, extract the core logic into a separate function that returns data rather than printing it"
      ],
      "class_interface": {},
      "complexity": "moderate",
      "component_type": "function",
      "conditional_imports": [
        {
          "condition": "Required configuration file vendor_email_config.py must exist with these constants defined",
          "import": "from vendor_email_config import TENANT_ID, CLIENT_ID, CLIENT_SECRET, OPENAI_API_KEY, DOMAIN",
          "optional": false
        }
      ],
      "created_at": "2025-12-06 16:25:55",
      "decorators": [],
      "dependencies": [
        "msal",
        "requests",
        "pandas",
        "openai",
        "pathlib"
      ],
      "description": "Demonstrates example usage of the VendorEmailExtractor class by searching for vendor emails across Office 365 mailboxes and displaying results.",
      "docstring": "Example usage",
      "id": 1261,
      "imports": [
        "import os",
        "import json",
        "import time",
        "import msal",
        "import requests",
        "import pandas as pd",
        "from typing import List",
        "from typing import Dict",
        "from typing import Optional",
        "from typing import Set",
        "from datetime import datetime",
        "from datetime import timedelta",
        "from pathlib import Path",
        "import re",
        "from openai import OpenAI",
        "from difflib import SequenceMatcher",
        "import unicodedata",
        "import re",
        "from difflib import SequenceMatcher",
        "from vendor_email_config import TENANT_ID",
        "from vendor_email_config import CLIENT_ID",
        "from vendor_email_config import CLIENT_SECRET",
        "from vendor_email_config import OPENAI_API_KEY",
        "from vendor_email_config import DOMAIN"
      ],
      "imports_required": [
        "from vendor_email_config import TENANT_ID, CLIENT_ID, CLIENT_SECRET, OPENAI_API_KEY, DOMAIN"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 1000,
      "line_start": 948,
      "name": "main_v37",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This function serves as a demonstration and testing entry point for the VendorEmailExtractor system. It loads configuration from a separate config file, initializes the extractor with Office 365 and OpenAI credentials, performs a comprehensive search for a specific vendor (Merck) across all mailboxes without limits, and displays formatted results including email counts, confidence scores, and mailbox distribution.",
      "return_annotation": null,
      "return_explained": "This function does not return any value (implicitly returns None). It performs side effects by printing results to stdout and potentially creating a VendorEmailExtractor instance that may cache data.",
      "settings_required": [
        "vendor_email_config.py file must exist in the same directory or Python path",
        "TENANT_ID: Azure AD tenant ID for Office 365 authentication",
        "CLIENT_ID: Azure AD application (client) ID with Microsoft Graph API permissions",
        "CLIENT_SECRET: Azure AD application client secret",
        "OPENAI_API_KEY: Valid OpenAI API key for email analysis",
        "DOMAIN: Email domain to search (e.g., 'vicebio.com')",
        "VendorEmailExtractor class must be defined and available in the same module",
        "Office 365 application must be registered with appropriate Microsoft Graph API permissions (Mail.Read, User.Read.All)"
      ],
      "source_code": "def main():\n    \"\"\"Example usage\"\"\"\n    # Load configuration\n    try:\n        from vendor_email_config import (\n            TENANT_ID, CLIENT_ID, CLIENT_SECRET, \n            OPENAI_API_KEY, DOMAIN\n        )\n    except ImportError:\n        print(\"ERROR: vendor_email_config.py not found!\")\n        print(\"Please create it from the O365_APP_SETUP_GUIDE.md instructions\")\n        return\n    \n    # Create extractor\n    extractor = VendorEmailExtractor(\n        tenant_id=TENANT_ID,\n        client_id=CLIENT_ID,\n        client_secret=CLIENT_SECRET,\n        openai_api_key=OPENAI_API_KEY,\n        domain=DOMAIN\n    )\n    \n    # Example: Extract for single vendor across ALL mailboxes\n    print(\"\\n=== Single Vendor Test - All Mailboxes ===\")\n    print(\"Searching all vicebio.com mailboxes for: Merck\")\n    print(\"No limits: All mailboxes, all emails, all time\\n\")\n    \n    df = extractor.extract_for_vendor(\n        vendor_name=\"Merck\",\n        max_mailboxes=None,  # Search ALL mailboxes\n        max_emails_per_mailbox=None,  # No email limit - get ALL matching emails\n        days_back=None  # No date limit\n    )\n    \n    print(f\"\\n{'='*60}\")\n    print(\"TEST COMPLETE\")\n    print(f\"{'='*60}\")\n    print(f\"Found {len(df)} total records\")\n    \n    if not df.empty:\n        print(f\"\\nUnique vendor emails: {df['vendor_email'].nunique()}\")\n        print(\"\\nTop results:\")\n        for email in df['vendor_email'].unique()[:10]:\n            count = len(df[df['vendor_email'] == email])\n            conf = df[df['vendor_email'] == email]['confidence'].mode()[0]\n            mailboxes = df[df['vendor_email'] == email]['found_in_mailbox'].nunique()\n            print(f\"  \u2022 {email}\")\n            print(f\"    Confidence: {conf}, Found in {count} emails across {mailboxes} mailboxes\")\n    else:\n        print(\"\\n\u26a0\ufe0f  No vendor emails found for Merck\")\n        print(\"This may be normal if Merck is not in email communications\")\n    \n    print(f\"\\n{'='*60}\\n\")",
      "source_file": "/tf/active/vicechatdev/find_email/vendor_email_extractor.py",
      "tags": [
        "example",
        "demo",
        "vendor-email-extraction",
        "office365",
        "microsoft-graph",
        "openai",
        "email-analysis",
        "entry-point",
        "testing",
        "configuration-loading"
      ],
      "updated_at": "2025-12-07T01:53:20.036700",
      "usage_example": "if __name__ == '__main__':\n    main()"
    },
    {
      "best_practices": [
        "Ensure the input CSV file 'data.csv' exists before calling this function",
        "Verify that column names in the dataset follow naming conventions that allow automatic identification of Eimeria and performance variables",
        "Ensure sufficient disk space and write permissions for output files",
        "Review console output for warnings about missing variable identification",
        "All helper functions (load_data, explore_data, etc.) must be properly defined in the same module",
        "Consider wrapping the function call in a try-except block to handle potential file I/O errors",
        "The function assumes a specific data structure and variable naming convention - review the identify_variables() function to understand expected column names",
        "Generated files will overwrite existing files with the same names in the current directory"
      ],
      "class_interface": {},
      "complexity": "complex",
      "component_type": "function",
      "conditional_imports": [],
      "created_at": "2025-12-06 13:19:07",
      "decorators": [],
      "dependencies": [
        "pandas",
        "numpy",
        "matplotlib",
        "seaborn",
        "scipy"
      ],
      "description": "Orchestrates a complete correlation analysis pipeline for Eimeria infection and broiler performance data, from data loading through visualization and results export.",
      "docstring": "Main execution function",
      "id": 822,
      "imports": [
        "import pandas as pd",
        "import numpy as np",
        "import matplotlib.pyplot as plt",
        "import seaborn as sns",
        "from scipy import stats",
        "from scipy.stats import pearsonr",
        "from scipy.stats import spearmanr",
        "import warnings"
      ],
      "imports_required": [
        "import pandas as pd",
        "import numpy as np",
        "import matplotlib.pyplot as plt",
        "import seaborn as sns",
        "from scipy import stats",
        "from scipy.stats import pearsonr",
        "from scipy.stats import spearmanr",
        "import warnings"
      ],
      "is_async": 0,
      "is_classmethod": 0,
      "is_property": 0,
      "is_staticmethod": 0,
      "line_end": 546,
      "line_start": 489,
      "name": "main_v36",
      "parameters": [],
      "parameters_explained": {},
      "parent_class": null,
      "purpose": "This is the main entry point function that coordinates a comprehensive statistical analysis workflow. It loads CSV data, identifies Eimeria infection variables and performance metrics, calculates correlations (both overall and grouped), generates visualizations (heatmaps, scatter plots), produces analytical conclusions, and exports results to multiple CSV and PNG files. Designed for veterinary/agricultural research analyzing the relationship between parasitic infection levels and poultry performance metrics.",
      "return_annotation": null,
      "return_explained": "Returns None. The function produces side effects including console output, generated visualization files (PNG), and exported data files (CSV). If Eimeria or performance variables cannot be identified in the dataset, the function prints a warning and returns early without generating outputs.",
      "settings_required": [
        "A file named 'data.csv' must exist in the current working directory containing the dataset to analyze",
        "The dataset should contain columns with names indicating Eimeria infection metrics and broiler performance variables",
        "Write permissions in the current directory for exporting CSV and PNG files",
        "The following helper functions must be defined in the same module: load_data(), explore_data(), identify_variables(), calculate_correlations(), grouped_correlation_analysis(), create_correlation_heatmap(), create_scatter_plots(), create_grouped_correlation_plot(), generate_conclusions(), export_results()"
      ],
      "source_code": "def main():\n    \"\"\"Main execution function\"\"\"\n    \n    print(\"=\"*80)\n    print(\"EIMERIA INFECTION AND BROILER PERFORMANCE CORRELATION ANALYSIS\")\n    print(\"=\"*80)\n    \n    # Load data\n    df = load_data('data.csv')\n    \n    # Explore data\n    categorical_vars, numerical_vars = explore_data(df)\n    \n    # Identify variables\n    eimeria_vars, performance_vars, grouping_vars = identify_variables(df, numerical_vars)\n    \n    if len(eimeria_vars) == 0 or len(performance_vars) == 0:\n        print(\"\\nWARNING: Could not identify Eimeria or performance variables.\")\n        print(\"Please ensure your dataset contains appropriate variable names.\")\n        return\n    \n    # Overall correlation analysis\n    overall_results = calculate_correlations(df, eimeria_vars, performance_vars)\n    \n    # Grouped correlation analysis\n    grouped_results = pd.DataFrame()\n    if len(grouping_vars) > 0:\n        grouped_results = grouped_correlation_analysis(df, eimeria_vars, \n                                                      performance_vars, grouping_vars)\n    \n    # Create visualizations\n    print(\"\\n\" + \"=\"*80)\n    print(\"CREATING VISUALIZATIONS\")\n    print(\"=\"*80)\n    \n    create_correlation_heatmap(df, eimeria_vars, performance_vars)\n    create_scatter_plots(df, eimeria_vars, performance_vars, grouping_vars)\n    \n    if len(grouped_results) > 0:\n        create_grouped_correlation_plot(grouped_results)\n    \n    # Generate conclusions\n    conclusions = generate_conclusions(overall_results, grouped_results, \n                                      eimeria_vars, performance_vars)\n    \n    # Export results\n    export_results(overall_results, grouped_results, conclusions)\n    \n    print(\"\\n\" + \"=\"*80)\n    print(\"ANALYSIS COMPLETE\")\n    print(\"=\"*80)\n    print(\"\\nGenerated files:\")\n    print(\"  - overall_correlations.csv\")\n    print(\"  - grouped_correlations.csv\")\n    print(\"  - significant_correlations.csv\")\n    print(\"  - correlation_heatmap.png\")\n    print(\"  - scatter plots (multiple)\")\n    print(\"  - grouped_correlations.png\")",
      "source_file": "/tf/active/vicechatdev/vice_ai/smartstat_scripts/5a059cb7-3903-4020-8519-14198d1f39c9/analysis_1.py",
      "tags": [
        "data-analysis",
        "correlation",
        "statistics",
        "veterinary",
        "agriculture",
        "eimeria",
        "broiler",
        "poultry",
        "visualization",
        "pipeline",
        "orchestration",
        "main-function",
        "entry-point",
        "csv-processing",
        "heatmap",
        "scatter-plot",
        "pearson",
        "spearman"
      ],
      "updated_at": "2025-12-07T01:53:20.035949",
      "usage_example": "# Ensure 'data.csv' exists in the current directory with appropriate columns\n# The CSV should contain Eimeria-related columns (e.g., 'eimeria_count', 'oocyst_level')\n# and performance columns (e.g., 'weight_gain', 'feed_conversion_ratio')\n\n# Simply call the main function to run the entire analysis pipeline\nmain()\n\n# Expected output:\n# - Console output showing analysis progress\n# - overall_correlations.csv: correlation coefficients for all data\n# - grouped_correlations.csv: correlations by grouping variables\n# - significant_correlations.csv: statistically significant correlations only\n# - correlation_heatmap.png: visual heatmap of correlations\n# - Multiple scatter plot PNG files\n# - grouped_correlations.png: grouped correlation visualization"
    }
  ],
  "count": 100
}
