Skip to content

Commit

Permalink
Merge pull request #1 from ollama/main
Browse files Browse the repository at this point in the history
Pull Main
  • Loading branch information
N-Vlahovic authored Nov 3, 2024
2 parents 2fde317 + ebe332b commit a38ad56
Show file tree
Hide file tree
Showing 12 changed files with 733 additions and 153 deletions.
46 changes: 29 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Ollama Python Library

The Ollama Python library provides the easiest way to integrate Python 3.8+ projects with [Ollama](https://github.com/jmorganca/ollama).
The Ollama Python library provides the easiest way to integrate Python 3.8+ projects with [Ollama](https://github.com/ollama/ollama).

## Install

Expand All @@ -12,7 +12,7 @@ pip install ollama

```python
import ollama
response = ollama.chat(model='llama2', messages=[
response = ollama.chat(model='llama3.1', messages=[
{
'role': 'user',
'content': 'Why is the sky blue?',
Expand All @@ -29,7 +29,7 @@ Response streaming can be enabled by setting `stream=True`, modifying function c
import ollama

stream = ollama.chat(
model='llama2',
model='llama3.1',
messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
stream=True,
)
Expand All @@ -40,18 +40,18 @@ for chunk in stream:

## API

The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/jmorganca/ollama/blob/main/docs/api.md)
The Ollama Python library's API is designed around the [Ollama REST API](https://github.com/ollama/ollama/blob/main/docs/api.md)

### Chat

```python
ollama.chat(model='llama2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
ollama.chat(model='llama3.1', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
```

### Generate

```python
ollama.generate(model='llama2', prompt='Why is the sky blue?')
ollama.generate(model='llama3.1', prompt='Why is the sky blue?')
```

### List
Expand All @@ -63,14 +63,14 @@ ollama.list()
### Show

```python
ollama.show('llama2')
ollama.show('llama3.1')
```

### Create

```python
modelfile='''
FROM llama2
FROM llama3.1
SYSTEM You are mario from super mario bros.
'''

Expand All @@ -80,31 +80,43 @@ ollama.create(model='example', modelfile=modelfile)
### Copy

```python
ollama.copy('llama2', 'user/llama2')
ollama.copy('llama3.1', 'user/llama3.1')
```

### Delete

```python
ollama.delete('llama2')
ollama.delete('llama3.1')
```

### Pull

```python
ollama.pull('llama2')
ollama.pull('llama3.1')
```

### Push

```python
ollama.push('user/llama2')
ollama.push('user/llama3.1')
```

### Embeddings
### Embed

```python
ollama.embeddings(model='llama2', prompt='The sky is blue because of rayleigh scattering')
ollama.embed(model='llama3.1', input='The sky is blue because of rayleigh scattering')
```

### Embed (batch)

```python
ollama.embed(model='llama3.1', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll'])
```

### Ps

```python
ollama.ps()
```

## Custom client
Expand All @@ -117,7 +129,7 @@ A custom client can be created with the following fields:
```python
from ollama import Client
client = Client(host='http://localhost:11434')
response = client.chat(model='llama2', messages=[
response = client.chat(model='llama3.1', messages=[
{
'role': 'user',
'content': 'Why is the sky blue?',
Expand All @@ -133,7 +145,7 @@ from ollama import AsyncClient

async def chat():
message = {'role': 'user', 'content': 'Why is the sky blue?'}
response = await AsyncClient().chat(model='llama2', messages=[message])
response = await AsyncClient().chat(model='llama3.1', messages=[message])

asyncio.run(chat())
```
Expand All @@ -146,7 +158,7 @@ from ollama import AsyncClient

async def chat():
message = {'role': 'user', 'content': 'Why is the sky blue?'}
async for part in await AsyncClient().chat(model='llama2', messages=[message], stream=True):
async for part in await AsyncClient().chat(model='llama3.1', messages=[message], stream=True):
print(part['message']['content'], end='', flush=True)

asyncio.run(chat())
Expand Down
6 changes: 3 additions & 3 deletions examples/fill-in-middle/main.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from ollama import generate

prefix = '''def remove_non_ascii(s: str) -> str:
prompt = '''def remove_non_ascii(s: str) -> str:
""" '''

suffix = """
return result
"""


response = generate(
model='codellama:7b-code',
prompt=f'<PRE> {prefix} <SUF>{suffix} <MID>',
prompt=prompt,
suffix=suffix,
options={
'num_predict': 128,
'temperature': 0,
Expand Down
31 changes: 31 additions & 0 deletions examples/ps/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from ollama import ps, pull, chat

response = pull('mistral', stream=True)
progress_states = set()
for progress in response:
if progress.get('status') in progress_states:
continue
progress_states.add(progress.get('status'))
print(progress.get('status'))

print('\n')

response = chat('mistral', messages=[{'role': 'user', 'content': 'Hello!'}])
print(response['message']['content'])

print('\n')

response = ps()

name = response['models'][0]['name']
size = response['models'][0]['size']
size_vram = response['models'][0]['size_vram']

if size == size_vram:
print(f'{name}: 100% GPU')
elif not size_vram:
print(f'{name}: 100% CPU')
else:
size_cpu = size - size_vram
cpu_percent = round(size_cpu / size * 100)
print(f'{name}: {cpu_percent}% CPU/{100 - cpu_percent}% GPU')
3 changes: 3 additions & 0 deletions examples/tools/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# tools

This example demonstrates how to utilize tool calls with an asynchronous Ollama client and the chat endpoint.
87 changes: 87 additions & 0 deletions examples/tools/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import json
import ollama
import asyncio


# Simulates an API call to get flight times
# In a real application, this would fetch data from a live database or API
def get_flight_times(departure: str, arrival: str) -> str:
flights = {
'NYC-LAX': {'departure': '08:00 AM', 'arrival': '11:30 AM', 'duration': '5h 30m'},
'LAX-NYC': {'departure': '02:00 PM', 'arrival': '10:30 PM', 'duration': '5h 30m'},
'LHR-JFK': {'departure': '10:00 AM', 'arrival': '01:00 PM', 'duration': '8h 00m'},
'JFK-LHR': {'departure': '09:00 PM', 'arrival': '09:00 AM', 'duration': '7h 00m'},
'CDG-DXB': {'departure': '11:00 AM', 'arrival': '08:00 PM', 'duration': '6h 00m'},
'DXB-CDG': {'departure': '03:00 AM', 'arrival': '07:30 AM', 'duration': '7h 30m'},
}

key = f'{departure}-{arrival}'.upper()
return json.dumps(flights.get(key, {'error': 'Flight not found'}))


async def run(model: str):
client = ollama.AsyncClient()
# Initialize conversation with a user query
messages = [{'role': 'user', 'content': 'What is the flight time from New York (NYC) to Los Angeles (LAX)?'}]

# First API call: Send the query and function description to the model
response = await client.chat(
model=model,
messages=messages,
tools=[
{
'type': 'function',
'function': {
'name': 'get_flight_times',
'description': 'Get the flight times between two cities',
'parameters': {
'type': 'object',
'properties': {
'departure': {
'type': 'string',
'description': 'The departure city (airport code)',
},
'arrival': {
'type': 'string',
'description': 'The arrival city (airport code)',
},
},
'required': ['departure', 'arrival'],
},
},
},
],
)

# Add the model's response to the conversation history
messages.append(response['message'])

# Check if the model decided to use the provided function
if not response['message'].get('tool_calls'):
print("The model didn't use the function. Its response was:")
print(response['message']['content'])
return

# Process function calls made by the model
if response['message'].get('tool_calls'):
available_functions = {
'get_flight_times': get_flight_times,
}
for tool in response['message']['tool_calls']:
function_to_call = available_functions[tool['function']['name']]
function_response = function_to_call(tool['function']['arguments']['departure'], tool['function']['arguments']['arrival'])
# Add function response to the conversation
messages.append(
{
'role': 'tool',
'content': function_response,
}
)

# Second API call: Get final response from the model
final_response = await client.chat(model=model, messages=messages)
print(final_response['message']['content'])


# Run the async function
asyncio.run(run('mistral'))
4 changes: 4 additions & 0 deletions ollama/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
'ResponseError',
'generate',
'chat',
'embed',
'embeddings',
'pull',
'push',
Expand All @@ -29,12 +30,14 @@
'list',
'copy',
'show',
'ps',
]

_client = Client()

generate = _client.generate
chat = _client.chat
embed = _client.embed
embeddings = _client.embeddings
pull = _client.pull
push = _client.push
Expand All @@ -43,3 +46,4 @@
list = _client.list
copy = _client.copy
show = _client.show
ps = _client.ps
Loading

0 comments on commit a38ad56

Please sign in to comment.