Skip to content
Open
41 changes: 41 additions & 0 deletions docs/source/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,44 @@ Example:
... print("Feed reader end")
...
>>> db.changes_feed(MyReader())


Pagination
----------

py-couchdb provides convenient pagination functionality for both CouchDB views and Mango queries. This eliminates the need to manually manage `skip` parameters and provides stable, cursor-based pagination.

View Pagination
~~~~~~~~~~~~~~~

Use `view_pages()` for paginating through CouchDB view results:

.. code-block:: python

>>> # Paginate through view results
>>> for page in db.view_pages("design/view", page_size=10):
... print(f"Page with {len(page)} rows")
... for row in page:
... print(f" {row['id']}: {row['key']}")

Mango Query Pagination
~~~~~~~~~~~~~~~~~~~~~~

Use `mango_pages()` for paginating through Mango query results:

.. code-block:: python

>>> # Paginate through Mango query results
>>> selector = {"type": "user", "active": True}
>>> for page in db.mango_pages(selector, page_size=10):
... print(f"Page with {len(page)} documents")
... for doc in page:
... print(f" {doc['_id']}: {doc['name']}")

Key Benefits
~~~~~~~~~~~

- **Stable pagination**: No duplicate or missing results during concurrent updates
- **Automatic cursor management**: No manual `skip` parameter handling
- **Memory efficient**: Process large datasets page by page
- **Consistent API**: Same interface for both view and Mango pagination
81 changes: 79 additions & 2 deletions pycouchdb/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,17 @@
import copy
import mimetypes
import warnings
from typing import Any, Dict, List, Optional, Union, Iterator, Callable, TYPE_CHECKING
from typing import Any, Dict, List, Optional, Union, Iterator, Callable, TYPE_CHECKING, Tuple

from . import utils
from . import feedreader
from . import exceptions as exp
from .resource import Resource
from .types import (
Json, Document, Row, BulkItem, ServerInfo, DatabaseInfo,
Json, Document, Row, BulkItem, ServerInfo, DatabaseInfo,
ChangeResult, ViewResult, Credentials, AuthMethod, DocId, Rev
)
from .pagination import view_pages, mango_pages, ViewRows, MangoDocs, PageSize

# Type alias for feed reader parameter
FeedReader = Union[Callable[[Dict[str, Any]], None], feedreader.BaseFeedReader]
Expand Down Expand Up @@ -855,3 +856,79 @@ def changes_list(self, **kwargs):

(resp, result) = self.resource("_changes").get(params=kwargs)
return result['last_seq'], result['results']

def find(self, selector: Dict[str, Any], **kwargs: Any) -> Iterator[Document]:
"""
Execute a Mango query using the _find endpoint.

:param selector: Mango query selector
:param kwargs: Additional query parameters (limit, bookmark, etc.)
:returns: Iterator of documents matching the selector
"""
params = copy.copy(kwargs)
params['selector'] = selector

data = utils.force_bytes(json.dumps(params))
(resp, result) = self.resource.post("_find", data=data)

if result is None or 'docs' not in result:
return

for doc in result['docs']:
yield doc

def view_pages(self, design_and_view: str, page_size: PageSize, params: Optional[Dict[str, Any]] = None) -> Iterator[ViewRows]:
"""
Paginate through CouchDB view results with automatic cursor management.

This method provides convenient pagination for view queries without manual
skip parameter management. It automatically handles startkey and startkey_docid
for stable pagination.

:param design_and_view: View name (e.g., "design/view")
:param page_size: Number of rows per page
:param params: Additional query parameters
:returns: Iterator yielding lists of rows for each page

.. versionadded:: 1.17
"""
path = utils._path_from_name(design_and_view, '_view')

def fetch_view(params_dict: Dict[str, Any]) -> Tuple[Any, Optional[Dict[str, Any]]]:
data = None
if "keys" in params_dict:
data_dict = {"keys": params_dict.pop('keys')}
data = utils.force_bytes(json.dumps(data_dict))

encoded_params = utils.encode_view_options(params_dict)

if data:
(resp, result) = self.resource(*path).post(params=encoded_params, data=data)
else:
(resp, result) = self.resource(*path).get(params=encoded_params)

return resp, result

return view_pages(fetch_view, design_and_view, page_size, params)

def mango_pages(self, selector: Dict[str, Any], page_size: PageSize, params: Optional[Dict[str, Any]] = None) -> Iterator[MangoDocs]:
"""
Paginate through Mango query results with automatic bookmark management.

This method provides convenient pagination for Mango queries without manual
bookmark parameter management. It automatically handles the bookmark cursor
for stable pagination.

:param selector: Mango query selector
:param page_size: Number of documents per page
:param params: Additional query parameters
:returns: Iterator yielding lists of documents for each page

.. versionadded:: 1.17
"""
def fetch_mango(params_dict: Dict[str, Any]) -> Tuple[Any, Optional[Dict[str, Any]]]:
data = utils.force_bytes(json.dumps(params_dict))
(resp, result) = self.resource.post("_find", data=data)
return resp, result

return mango_pages(fetch_mango, selector, page_size, params)
152 changes: 152 additions & 0 deletions pycouchdb/pagination.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# -*- coding: utf-8 -*-

"""
Pagination utilities for CouchDB views and Mango queries.

This module provides convenient pagination functionality for both CouchDB views
and Mango queries, handling the complexity of cursor-based pagination internally.
"""

from typing import Any, Dict, List, Iterator, Optional, Callable, Union, Tuple
import json
import copy

from . import utils
from .types import Row, Document, Json, ViewRows, MangoDocs, PageSize

__all__ = ['view_pages', 'mango_pages', 'ViewRows', 'MangoDocs', 'PageSize']


def view_pages(
fetch: Callable[[Dict[str, Any]], Tuple[Any, Optional[Dict[str, Any]]]],
view: str,
page_size: PageSize,
params: Optional[Dict[str, Any]] = None
) -> Iterator[ViewRows]:
"""
Paginate through CouchDB view results using startkey/startkey_docid cursor.

This function handles the complexity of CouchDB view pagination by automatically
managing startkey and startkey_docid parameters for stable pagination.

.. warning::
Pagination with grouped and reduced views (group=true, reduce=true) is
inefficient and unreliable. CouchDB must process all preceding groups
for skip operations, and total_rows/offset values are inconsistent with
reduced output. Consider fetching all results at once for reduced views.

:param fetch: Function that makes the actual HTTP request and returns (response, result)
:param view: View name (e.g., "design/view")
:param page_size: Number of rows per page
:param params: Additional query parameters
:returns: Iterator yielding lists of rows for each page
"""
if params is None:
params = {}

# Create a copy to avoid modifying the original
query_params = copy.deepcopy(params)
query_params['limit'] = page_size + 1 # Request one extra to detect if there are more pages

# Track pagination state
startkey = None
startkey_docid = None
skip = 0

while True:
# Build current page parameters
current_params = copy.deepcopy(query_params)

if startkey is not None:
current_params['startkey'] = startkey
current_params['startkey_docid'] = startkey_docid
current_params['skip'] = skip

# Encode view parameters properly
current_params = _encode_view_params(current_params)

# Make the request
response, result = fetch(current_params)

if result is None or 'rows' not in result:
break

rows = result['rows']

# If we got fewer rows than requested, this is the last page
if len(rows) <= page_size:
if rows: # Only yield if there are rows
yield rows
break

# We got more rows than page_size, so there are more pages
# Yield current page (excluding the extra row)
current_page = rows[:page_size]
yield current_page

# Set up for next page using the last row as cursor
last_row = rows[page_size - 1]
startkey = last_row['key']
startkey_docid = last_row['id']
Comment on lines +89 to +90
Copy link

Copilot AI Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code uses magic string keys 'key' and 'id' without validation. Consider adding defensive checks or constants for these keys to improve maintainability.

Copilot uses AI. Check for mistakes.
skip = 1 # Skip the row used as the cursor to avoid returning it again (prevents duplicate results in cursor-based pagination)
Copy link

Copilot AI Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The comment is overly verbose and could be clearer. Consider simplifying to 'Skip cursor row to avoid duplicates'.

Suggested change
skip = 1 # Skip the row used as the cursor to avoid returning it again (prevents duplicate results in cursor-based pagination)
skip = 1 # Skip cursor row to avoid duplicates

Copilot uses AI. Check for mistakes.


def mango_pages(
fetch_find: Callable[[Dict[str, Any]], Tuple[Any, Optional[Dict[str, Any]]]],
selector: Dict[str, Any],
page_size: PageSize,
params: Optional[Dict[str, Any]] = None
) -> Iterator[MangoDocs]:
"""
Paginate through Mango query results using bookmark cursor.

This function handles Mango query pagination by automatically managing
the bookmark parameter for stable pagination.

:param fetch_find: Function that makes the actual HTTP request and returns (response, result)
:param selector: Mango query selector
:param page_size: Number of documents per page
:param params: Additional query parameters
:returns: Iterator yielding lists of documents for each page
"""
if params is None:
params = {}

# Create a copy to avoid modifying the original
query_params = copy.deepcopy(params)
query_params['limit'] = page_size
query_params['selector'] = selector

bookmark = None

while True:
# Build current page parameters
current_params = copy.deepcopy(query_params)

if bookmark is not None:
current_params['bookmark'] = bookmark

# Make the request
response, result = fetch_find(current_params)

if result is None or 'docs' not in result:
break

docs = result['docs']

# If no documents, we're done
if not docs:
break

# Yield current page
yield docs

# Check if there are more pages
bookmark = result.get('bookmark')
if not bookmark:
break


def _encode_view_params(params: Dict[str, Any]) -> Dict[str, Any]:
"""Encode view parameters using the same logic as the main client."""
return utils.encode_view_options(params)
5 changes: 5 additions & 0 deletions pycouchdb/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ def on_heartbeat(self) -> None: ...
DocId = str
Rev = str

# Pagination type aliases
ViewRows = List[Row]
MangoDocs = List[Document]
PageSize = int

# Constants
DEFAULT_BASE_URL: Final[str] = "http://localhost:5984/"
DEFAULT_AUTH_METHOD: Final[str] = "basic"
7 changes: 6 additions & 1 deletion pycouchdb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,12 @@ def urljoin(base: str, *path: str) -> str:

def as_json(response: Any) -> Optional[Union[Dict[str, Any], List[Any], str]]:
if "application/json" in response.headers['content-type']:
response_src = response.content.decode('utf-8')
try:
response_src = response.content.decode('utf-8')
except UnicodeDecodeError:
# Try with error handling for invalid UTF-8
response_src = response.content.decode('utf-8', errors='replace')

if response.content != b'':
return json.loads(response_src)
else:
Expand Down
Loading