N-Vlahovic · N-Vlahovic · Nov 3, 2024 · Jan 31, 2024 · Feb 1, 2024 · Feb 15, 2024
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Ollama Python Library
 
-The Ollama Python library provides the easiest way to integrate Python 3.8+ projects with [Ollama](https://github.com/jmorganca/ollama).
+The Ollama Python library provides the easiest way to integrate Python 3.8+ projects with [Ollama](https://github.com/ollama/ollama).
 
 ## Install
 
@@ -12,7 +12,7 @@ pip install ollama
 
 ```python
 import ollama
-response = ollama.chat(model='llama2', messages=[
+response = ollama.chat(model='llama3.1', messages=[
   {
     'role': 'user',
     'content': 'Why is the sky blue?',
@@ -29,7 +29,7 @@ Response streaming can be enabled by setting `stream=True`, modifying function c
 import ollama
 
 stream = ollama.chat(
-    model='llama2',
+    model='llama3.1',
     messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
     stream=True,
 )
@@ -40,18 +40,18 @@ for chunk in stream:
 
 ## API
 
-The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/jmorganca/ollama/blob/main/docs/api.md)
+The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)
 
 ### Chat
 
 ```python
-ollama.chat(model='llama2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
+ollama.chat(model='llama3.1', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
 ```
 
 ### Generate
 
 ```python
-ollama.generate(model='llama2', prompt='Why is the sky blue?')
+ollama.generate(model='llama3.1', prompt='Why is the sky blue?')
 ```
 
 ### List
@@ -63,14 +63,14 @@ ollama.list()
 ### Show
 
 ```python
-ollama.show('llama2')
+ollama.show('llama3.1')
 ```
 
 ### Create
 
 ```python
 modelfile='''
-FROM llama2
+FROM llama3.1
 SYSTEM You are mario from super mario bros.
 '''
 
@@ -80,31 +80,43 @@ ollama.create(model='example', modelfile=modelfile)
 ### Copy
 
 ```python
-ollama.copy('llama2', 'user/llama2')
+ollama.copy('llama3.1', 'user/llama3.1')
 ```
 
 ### Delete
 
 ```python
-ollama.delete('llama2')
+ollama.delete('llama3.1')
 ```
 
 ### Pull
 
 ```python
-ollama.pull('llama2')
+ollama.pull('llama3.1')
 ```
 
 ### Push
 
 ```python
-ollama.push('user/llama2')
+ollama.push('user/llama3.1')
 ```
 
-### Embeddings
+### Embed
 
 ```python
-ollama.embeddings(model='llama2', prompt='The sky is blue because of rayleigh scattering')
+ollama.embed(model='llama3.1', input='The sky is blue because of rayleigh scattering')
+```
+
+### Embed (batch)
+
+```python
+ollama.embed(model='llama3.1', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll'])
+```
+
+### Ps
+
+```python
+ollama.ps()
 ```
 
 ## Custom client
@@ -117,7 +129,7 @@ A custom client can be created with the following fields:
 ```python
 from ollama import Client
 client = Client(host='http://localhost:11434')
-response = client.chat(model='llama2', messages=[
+response = client.chat(model='llama3.1', messages=[
   {
     'role': 'user',
     'content': 'Why is the sky blue?',
@@ -133,7 +145,7 @@ from ollama import AsyncClient
 
 async def chat():
   message = {'role': 'user', 'content': 'Why is the sky blue?'}
-  response = await AsyncClient().chat(model='llama2', messages=[message])
+  response = await AsyncClient().chat(model='llama3.1', messages=[message])
 
 asyncio.run(chat())
 ```
@@ -146,7 +158,7 @@ from ollama import AsyncClient
 
 async def chat():
   message = {'role': 'user', 'content': 'Why is the sky blue?'}
-  async for part in await AsyncClient().chat(model='llama2', messages=[message], stream=True):
+  async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
     print(part['message']['content'], end='', flush=True)
 
 asyncio.run(chat())

diff --git a/examples/fill-in-middle/main.py b/examples/fill-in-middle/main.py
@@ -1,16 +1,16 @@
 from ollama import generate
 
-prefix = '''def remove_non_ascii(s: str) -> str:
+prompt = '''def remove_non_ascii(s: str) -> str:
     """ '''
 
 suffix = """
     return result
 """
 
-
 response = generate(
   model='codellama:7b-code',
-  prompt=f'<PRE> {prefix} <SUF>{suffix} <MID>',
+  prompt=prompt,
+  suffix=suffix,
   options={
     'num_predict': 128,
     'temperature': 0,

diff --git a/examples/ps/main.py b/examples/ps/main.py
@@ -0,0 +1,31 @@
+from ollama import ps, pull, chat
+
+response = pull('mistral', stream=True)
+progress_states = set()
+for progress in response:
+  if progress.get('status') in progress_states:
+    continue
+  progress_states.add(progress.get('status'))
+  print(progress.get('status'))
+
+print('\n')
+
+response = chat('mistral', messages=[{'role': 'user', 'content': 'Hello!'}])
+print(response['message']['content'])
+
+print('\n')
+
+response = ps()
+
+name = response['models'][0]['name']
+size = response['models'][0]['size']
+size_vram = response['models'][0]['size_vram']
+
+if size == size_vram:
+  print(f'{name}: 100% GPU')
+elif not size_vram:
+  print(f'{name}: 100% CPU')
+else:
+  size_cpu = size - size_vram
+  cpu_percent = round(size_cpu / size * 100)
+  print(f'{name}: {cpu_percent}% CPU/{100 - cpu_percent}% GPU')
diff --git a/examples/tools/README.md b/examples/tools/README.md
@@ -0,0 +1,3 @@
+# tools
+
+This example demonstrates how to utilize tool calls with an asynchronous Ollama client and the chat endpoint. 
diff --git a/examples/tools/main.py b/examples/tools/main.py
@@ -0,0 +1,87 @@
+import json
+import ollama
+import asyncio
+
+
+# Simulates an API call to get flight times
+# In a real application, this would fetch data from a live database or API
+def get_flight_times(departure: str, arrival: str) -> str:
+  flights = {
+    'NYC-LAX': {'departure': '08:00 AM', 'arrival': '11:30 AM', 'duration': '5h 30m'},
+    'LAX-NYC': {'departure': '02:00 PM', 'arrival': '10:30 PM', 'duration': '5h 30m'},
+    'LHR-JFK': {'departure': '10:00 AM', 'arrival': '01:00 PM', 'duration': '8h 00m'},
+    'JFK-LHR': {'departure': '09:00 PM', 'arrival': '09:00 AM', 'duration': '7h 00m'},
+    'CDG-DXB': {'departure': '11:00 AM', 'arrival': '08:00 PM', 'duration': '6h 00m'},
+    'DXB-CDG': {'departure': '03:00 AM', 'arrival': '07:30 AM', 'duration': '7h 30m'},
+  }
+
+  key = f'{departure}-{arrival}'.upper()
+  return json.dumps(flights.get(key, {'error': 'Flight not found'}))
+
+
+async def run(model: str):
+  client = ollama.AsyncClient()
+  # Initialize conversation with a user query
+  messages = [{'role': 'user', 'content': 'What is the flight time from New York (NYC) to Los Angeles (LAX)?'}]
+
+  # First API call: Send the query and function description to the model
+  response = await client.chat(
+    model=model,
+    messages=messages,
+    tools=[
+      {
+        'type': 'function',
+        'function': {
+          'name': 'get_flight_times',
+          'description': 'Get the flight times between two cities',
+          'parameters': {
+            'type': 'object',
+            'properties': {
+              'departure': {
+                'type': 'string',
+                'description': 'The departure city (airport code)',
+              },
+              'arrival': {
+                'type': 'string',
+                'description': 'The arrival city (airport code)',
+              },
+            },
+            'required': ['departure', 'arrival'],
+          },
+        },
+      },
+    ],
+  )
+
+  # Add the model's response to the conversation history
+  messages.append(response['message'])
+
+  # Check if the model decided to use the provided function
+  if not response['message'].get('tool_calls'):
+    print("The model didn't use the function. Its response was:")
+    print(response['message']['content'])
+    return
+
+  # Process function calls made by the model
+  if response['message'].get('tool_calls'):
+    available_functions = {
+      'get_flight_times': get_flight_times,
+    }
+    for tool in response['message']['tool_calls']:
+      function_to_call = available_functions[tool['function']['name']]
+      function_response = function_to_call(tool['function']['arguments']['departure'], tool['function']['arguments']['arrival'])
+      # Add function response to the conversation
+      messages.append(
+        {
+          'role': 'tool',
+          'content': function_response,
+        }
+      )
+
+  # Second API call: Get final response from the model
+  final_response = await client.chat(model=model, messages=messages)
+  print(final_response['message']['content'])
+
+
+# Run the async function
+asyncio.run(run('mistral'))
diff --git a/ollama/__init__.py b/ollama/__init__.py
@@ -21,6 +21,7 @@
   'ResponseError',
   'generate',
   'chat',
+  'embed',
   'embeddings',
   'pull',
   'push',
@@ -29,12 +30,14 @@
   'list',
   'copy',
   'show',
+  'ps',
 ]
 
 _client = Client()
 
 generate = _client.generate
 chat = _client.chat
+embed = _client.embed
 embeddings = _client.embeddings
 pull = _client.pull
 push = _client.push
@@ -43,3 +46,4 @@
 list = _client.list
 copy = _client.copy
 show = _client.show
+ps = _client.ps
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# tools

		This example demonstrates how to utilize tool calls with an asynchronous Ollama client and the chat endpoint.