aryn-ai · HenryL27 · Jan 30, 2025 · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025
diff --git a/lib/sycamore/sycamore/docset.py b/lib/sycamore/sycamore/docset.py
@@ -8,7 +8,8 @@
 from sycamore.context import Context, context_params, OperationTypes
 from sycamore.data import Document, Element, MetadataDocument
 from sycamore.functions.tokenizer import Tokenizer
-from sycamore.llms.llms import LLM
+from sycamore.llms.llms import LLM, LLMMode
+from sycamore.llms.prompts import SycamorePrompt
 from sycamore.llms.prompts.default_prompts import (
     LlmClusterEntityAssignGroupsMessagesPrompt,
     LlmClusterEntityFormGroupsMessagesPrompt,
@@ -29,6 +30,7 @@
 from sycamore.transforms.extract_table import TableExtractor
 from sycamore.transforms.merge_elements import ElementMerger
 from sycamore.utils.extract_json import extract_json
+from sycamore.utils.deprecate import deprecated
 from sycamore.transforms.query import QueryExecutor, Query
 from sycamore.materialize_config import MaterializeSourceMode
 
@@ -465,6 +467,7 @@ def extract_document_structure(self, structure: DocumentStructure, **kwargs):
         document_structure = ExtractDocumentStructure(self.plan, structure=structure, **kwargs)
         return DocSet(self.context, document_structure)
 
+    @deprecated(version="0.1.31", reason="Use llm_map instead")
     def extract_entity(self, entity_extractor: EntityExtractor, **kwargs) -> "DocSet":
         """
         Applies the ExtractEntity transform on the Docset.
@@ -489,10 +492,8 @@ def extract_entity(self, entity_extractor: EntityExtractor, **kwargs) -> "DocSet
                      .extract_entity(entity_extractor=entity_extractor)
 
         """
-        from sycamore.transforms import ExtractEntity
-
-        entities = ExtractEntity(self.plan, context=self.context, entity_extractor=entity_extractor, **kwargs)
-        return DocSet(self.context, entities)
+        llm_map = entity_extractor.as_llm_map(self.plan, context=self.context, **kwargs)
+        return DocSet(self.context, llm_map)
 
     def extract_schema(self, schema_extractor: SchemaExtractor, **kwargs) -> "DocSet":
         """
@@ -948,6 +949,42 @@ def custom_flat_mapping_function(document: Document) -> list[Document]:
         flat_map = FlatMap(self.plan, f=f, **resource_args)
         return DocSet(self.context, flat_map)
 
+    def llm_map(
+        self, prompt: SycamorePrompt, output_field: str, llm: LLM, llm_mode: LLMMode = LLMMode.SYNC, **kwargs
+    ) -> "DocSet":
+        """
+        Renders and runs a prompt on every Document of the DocSet.
+
+        Args:
+            prompt: The prompt to use. Must implement the ``render_document`` method
+            output_field: Field in properties to store the output.
+            llm: LLM to use for the inferences.
+            llm_mode: how to make the api calls to the llm - sync/async/batch
+        """
+        from sycamore.transforms.base_llm import LLMMap
+
+        llm_map = LLMMap(self.plan, prompt=prompt, output_field=output_field, llm=llm, llm_mode=llm_mode, **kwargs)
+        return DocSet(self.context, llm_map)
+
+    def llm_map_elements(
+        self, prompt: SycamorePrompt, output_field: str, llm: LLM, llm_mode: LLMMode = LLMMode.SYNC, **kwargs
+    ) -> "DocSet":
+        """
+        Renders and runs a prompt on every Element of every Document in the DocSet.
+
+        Args:
+            prompt: The prompt to use. Must implement the ``render_document`` method
+            output_field: Field in properties to store the output.
+            llm: LLM to use for the inferences.
+            llm_mode: how to make the api calls to the llm - sync/async/batch
+        """
+        from sycamore.transforms.base_llm import LLMMapElements
+
+        llm_map_elements = LLMMapElements(
+            self.plan, prompt=prompt, output_field=output_field, llm=llm, llm_mode=llm_mode, **kwargs
+        )
+        return DocSet(self.context, llm_map_elements)
+
     def filter(self, f: Callable[[Document], bool], **kwargs) -> "DocSet":
         """
         Applies the Filter transform on the Docset.
@@ -1356,7 +1393,7 @@ def llm_cluster_entity(self, llm: LLM, instruction: str, field: str, **kwargs) -
         prompt_kwargs = {"messages": messages}
 
         # call to LLM
-        completion = llm.generate(prompt_kwargs=prompt_kwargs, llm_kwargs={"temperature": 0})
+        completion = llm.generate_old(prompt_kwargs=prompt_kwargs, llm_kwargs={"temperature": 0})
 
         groups = extract_json(completion)
 

diff --git a/lib/sycamore/sycamore/evaluation/subtasks.py b/lib/sycamore/sycamore/evaluation/subtasks.py
@@ -5,7 +5,7 @@
 from sycamore.docset import DocSet
 from sycamore.llms.llms import LLM
 from sycamore.llms.openai import OpenAI, OpenAIModels
-from sycamore.llms.prompts.default_prompts import SimpleGuidancePrompt, TaskIdentifierZeroShotGuidancePrompt
+from sycamore.llms.prompts.default_prompts import SimplePrompt, _TaskIdentifierZeroShotGuidancePrompt
 from sycamore.transforms.embed import Embedder, SentenceTransformerEmbedder
 from sycamore.transforms.query import QueryExecutor
 
@@ -22,7 +22,7 @@ def __init__(
             model_name="sentence-transformers/all-MiniLM-L6-v2", batch_size=100
         ),
         llm: LLM = OpenAI(OpenAIModels.GPT_3_5_TURBO.value),
-        prompt: SimpleGuidancePrompt = TaskIdentifierZeroShotGuidancePrompt(),
+        prompt: SimplePrompt = _TaskIdentifierZeroShotGuidancePrompt(),
         knn_query: bool = False,
     ):
         if subtask_data:
@@ -44,7 +44,7 @@ def __init__(
     def _get_formulas(self, document: Document) -> list[Document]:
         f_list = []
         if document.properties["subtasks_reqd"]:
-            task_id = self._llm.generate(
+            task_id = self._llm.generate_old(
                 prompt_kwargs={
                     "prompt": self._prompt,
                     "question": document["question"],

diff --git a/lib/sycamore/sycamore/llms/anthropic.py b/lib/sycamore/sycamore/llms/anthropic.py
@@ -6,7 +6,7 @@
 from PIL import Image
 
 from sycamore.llms.llms import LLM
-from sycamore.llms.prompts.default_prompts import SimplePrompt
+from sycamore.llms.prompts import RenderedPrompt
 from sycamore.utils.cache import Cache
 from sycamore.utils.image_utils import base64_data
 from sycamore.utils.import_utils import requires_modules
@@ -49,29 +49,54 @@ def rewrite_system_messages(messages: Optional[list[dict]]) -> Optional[list[dic
     return [m for m in messages if m.get("role") != "system"]
 
 
-def get_generate_kwargs(prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> dict:
+def get_generate_kwargs(prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None) -> dict:
     kwargs = {
         "temperature": 0,
         **(llm_kwargs or {}),
     }
-
     kwargs["max_tokens"] = kwargs.get("max_tokens", DEFAULT_MAX_TOKENS)
 
-    if "prompt" in prompt_kwargs:
-        prompt = prompt_kwargs.get("prompt")
-
-        if isinstance(prompt, SimplePrompt):
-            kwargs.update({"messages": prompt.as_messages(prompt_kwargs)})
+    # Anthropic models require _exactly_ alternation between "user" and "assistant"
+    # roles, so we break the messages into groups of consecutive user/assistant
+    # messages, treating "system" as "user". Then crunch each group down to a single
+    # message to ensure alternation.
+    message_groups = []  # type: ignore
+    last_role = None
+
+    for m in prompt.messages:
+        r = m.role
+        if r == "system":
+            r = "user"
+        if r != last_role:
+            message_groups.append([])
+        message_groups[-1].append(m)
+        last_role = r
+
+    messages = []
+    for group in message_groups:
+        role = group[0].role
+        if role == "system":
+            role = "user"
+        content = "\n".join(m.content for m in group)
+        if any(m.images is not None for m in group):
+            images = [im for m in group for im in m.images]
+            contents = [{"type": "text", "text": content}]
+            for im in images:
+                contents.append(
+                    {  # type: ignore
+                        "type": "image",
+                        "source": {  # type: ignore
+                            "type": "base64",
+                            "media_type": "image/png",
+                            "data": base64_data(im),
+                        },
+                    }
+                )
+            messages.append({"role": role, "content": contents})
         else:
-            kwargs.update({"messages": [{"role": "user", "content": f"{prompt}"}]})
-
-    elif "messages" in prompt_kwargs:
-        kwargs.update({"messages": prompt_kwargs["messages"]})
-    else:
-        raise ValueError("Either prompt or messages must be present in prompt_kwargs.")
-
-    kwargs["messages"] = rewrite_system_messages(kwargs["messages"])
+            messages.append({"role": role, "content": content})
 
+    kwargs["messages"] = messages
     return kwargs
 
 
@@ -128,12 +153,12 @@ def is_chat_mode(self) -> bool:
     def format_image(self, image: Image.Image) -> dict[str, Any]:
         return format_image(image)
 
-    def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> dict:
-        ret = self._llm_cache_get(prompt_kwargs, llm_kwargs)
+    def generate_metadata(self, *, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None) -> dict:
+        ret = self._llm_cache_get(prompt, llm_kwargs)
         if isinstance(ret, dict):
             return ret
 
-        kwargs = get_generate_kwargs(prompt_kwargs, llm_kwargs)
+        kwargs = get_generate_kwargs(prompt, llm_kwargs)
 
         start = datetime.now()
 
@@ -153,9 +178,9 @@ def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] =
         self.add_llm_metadata(kwargs, output, wall_latency, in_tokens, out_tokens)
         logging.debug(f"Generated response from Anthropic model: {ret}")
 
-        self._llm_cache_set(prompt_kwargs, llm_kwargs, ret)
+        self._llm_cache_set(prompt, llm_kwargs, ret)
         return ret
 
-    def generate(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> str:
-        d = self.generate_metadata(prompt_kwargs=prompt_kwargs, llm_kwargs=llm_kwargs)
+    def generate(self, *, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None) -> str:
+        d = self.generate_metadata(prompt=prompt, llm_kwargs=llm_kwargs)
         return d["output"]
diff --git a/lib/sycamore/sycamore/llms/bedrock.py b/lib/sycamore/sycamore/llms/bedrock.py
@@ -9,6 +9,7 @@
 
 from sycamore.llms.llms import LLM
 from sycamore.llms.anthropic import format_image, get_generate_kwargs
+from sycamore.llms.prompts.prompts import RenderedPrompt
 from sycamore.utils.cache import Cache
 
 DEFAULT_MAX_TOKENS = 1000
@@ -77,14 +78,14 @@ def format_image(self, image: Image.Image) -> dict[str, Any]:
             return format_image(image)
         raise NotImplementedError("Images not supported for non-Anthropic Bedrock models.")
 
-    def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> dict:
-        ret = self._llm_cache_get(prompt_kwargs, llm_kwargs)
+    def generate_metadata(self, *, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None) -> dict:
+        ret = self._llm_cache_get(prompt, llm_kwargs)
         if isinstance(ret, dict):
             print(f"cache return {ret}")
             return ret
         assert ret is None
 
-        kwargs = get_generate_kwargs(prompt_kwargs, llm_kwargs)
+        kwargs = get_generate_kwargs(prompt, llm_kwargs)
         if self._model_name.startswith("anthropic."):
             anthropic_version = (
                 DEFAULT_ANTHROPIC_VERSION
@@ -115,9 +116,9 @@ def generate_metadata(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] =
             "out_tokens": out_tokens,
         }
         self.add_llm_metadata(kwargs, output, wall_latency, in_tokens, out_tokens)
-        self._llm_cache_set(prompt_kwargs, llm_kwargs, ret)
+        self._llm_cache_set(prompt, llm_kwargs, ret)
         return ret
 
-    def generate(self, *, prompt_kwargs: dict, llm_kwargs: Optional[dict] = None) -> str:
-        d = self.generate_metadata(prompt_kwargs=prompt_kwargs, llm_kwargs=llm_kwargs)
+    def generate(self, *, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None) -> str:
+        d = self.generate_metadata(prompt=prompt, llm_kwargs=llm_kwargs)
         return d["output"]