ollama · ParthSareen · Jul 22, 2025 · Jul 22, 2025
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@ The Ollama Python library provides the easiest way to integrate Python 3.8+ proj
 ## Prerequisites
 
 - [Ollama](https://ollama.com/download) should be installed and running
-- Pull a model to use with the library: `ollama pull <model>` e.g. `ollama pull llama3.2`
+- Pull a model to use with the library: `ollama pull <model>` e.g. `ollama pull gemma3`
   - See [Ollama.com](https://ollama.com/search) for more information on the models available.
 
 ## Install
@@ -20,7 +20,7 @@ pip install ollama
 from ollama import chat
 from ollama import ChatResponse
 
-response: ChatResponse = chat(model='llama3.2', messages=[
+response: ChatResponse = chat(model='gemma3', messages=[
   {
     'role': 'user',
     'content': 'Why is the sky blue?',
@@ -41,7 +41,7 @@ Response streaming can be enabled by setting `stream=True`.
 from ollama import chat
 
 stream = chat(
-    model='llama3.2',
+    model='gemma3',
     messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
     stream=True,
 )
@@ -61,7 +61,7 @@ client = Client(
   host='http://localhost:11434',
   headers={'x-some-header': 'some-value'}
 )
-response = client.chat(model='llama3.2', messages=[
+response = client.chat(model='gemma3', messages=[
   {
     'role': 'user',
     'content': 'Why is the sky blue?',
@@ -79,7 +79,7 @@ from ollama import AsyncClient
 
 async def chat():
   message = {'role': 'user', 'content': 'Why is the sky blue?'}
-  response = await AsyncClient().chat(model='llama3.2', messages=[message])
+  response = await AsyncClient().chat(model='gemma3', messages=[message])
 
 asyncio.run(chat())
 ```
@@ -92,7 +92,7 @@ from ollama import AsyncClient
 
 async def chat():
   message = {'role': 'user', 'content': 'Why is the sky blue?'}
-  async for part in await AsyncClient().chat(model='llama3.2', messages=[message], stream=True):
+  async for part in await AsyncClient().chat(model='gemma3', messages=[message], stream=True):
     print(part['message']['content'], end='', flush=True)
 
 asyncio.run(chat())
@@ -105,13 +105,13 @@ The Ollama Python library's API is designed around the [Ollama REST API](https:/
 ### Chat
 
 ```python
-ollama.chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
+ollama.chat(model='gemma3', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
 ```
 
 ### Generate
 
 ```python
-ollama.generate(model='llama3.2', prompt='Why is the sky blue?')
+ollama.generate(model='gemma3', prompt='Why is the sky blue?')
 ```
 
 ### List
@@ -123,49 +123,49 @@ ollama.list()
 ### Show
 
 ```python
-ollama.show('llama3.2')
+ollama.show('gemma3')
 ```
 
 ### Create
 
 ```python
-ollama.create(model='example', from_='llama3.2', system="You are Mario from Super Mario Bros.")
+ollama.create(model='example', from_='gemma3', system="You are Mario from Super Mario Bros.")
 ```
 
 ### Copy
 
 ```python
-ollama.copy('llama3.2', 'user/llama3.2')
+ollama.copy('gemma3', 'user/gemma3')
 ```
 
 ### Delete
 
 ```python
-ollama.delete('llama3.2')
+ollama.delete('gemma3')
 ```
 
 ### Pull
 
 ```python
-ollama.pull('llama3.2')
+ollama.pull('gemma3')
 ```
 
 ### Push
 
 ```python
-ollama.push('user/llama3.2')
+ollama.push('user/gemma3')
 ```
 
 ### Embed
 
 ```python
-ollama.embed(model='llama3.2', input='The sky is blue because of rayleigh scattering')
+ollama.embed(model='gemma3', input='The sky is blue because of rayleigh scattering')
 ```
 
 ### Embed (batch)
 
 ```python
-ollama.embed(model='llama3.2', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll'])
+ollama.embed(model='gemma3', input=['The sky is blue because of rayleigh scattering', 'Grass is green because of chlorophyll'])
 ```
 
 ### Ps

diff --git a/examples/async-chat.py b/examples/async-chat.py
@@ -12,7 +12,7 @@ async def main():
   ]
 
   client = AsyncClient()
-  response = await client.chat('llama3.2', messages=messages)
+  response = await client.chat('gemma3', messages=messages)
   print(response['message']['content'])
 
 

diff --git a/examples/async-generate.py b/examples/async-generate.py
@@ -5,7 +5,7 @@
 
 async def main():
   client = ollama.AsyncClient()
-  response = await client.generate('llama3.2', 'Why is the sky blue?')
+  response = await client.generate('gemma3', 'Why is the sky blue?')
   print(response['response'])
 
 

diff --git a/examples/chat-stream.py b/examples/chat-stream.py
@@ -7,7 +7,5 @@
   },
 ]
 
-for part in chat('llama3.2', messages=messages, stream=True):
+for part in chat('gemma3', messages=messages, stream=True):
   print(part['message']['content'], end='', flush=True)
-
-print()
diff --git a/examples/chat-with-history.py b/examples/chat-with-history.py
@@ -22,7 +22,7 @@
 while True:
   user_input = input('Chat with history: ')
   response = chat(
-    'llama3.2',
+    'gemma3',
     messages=[*messages, {'role': 'user', 'content': user_input}],
   )
 

diff --git a/examples/chat.py b/examples/chat.py
@@ -7,5 +7,5 @@
   },
 ]
 
-response = chat('llama3.2', messages=messages)
+response = chat('gemma3', messages=messages)
 print(response['message']['content'])
diff --git a/examples/create.py b/examples/create.py
@@ -3,7 +3,7 @@
 client = Client()
 response = client.create(
   model='my-assistant',
-  from_='llama3.2',
+  from_='gemma3',
   system='You are mario from Super Mario Bros.',
   stream=False,
 )

diff --git a/examples/generate-stream.py b/examples/generate-stream.py
@@ -1,4 +1,4 @@
 from ollama import generate
 
-for part in generate('llama3.2', 'Why is the sky blue?', stream=True):
+for part in generate('gemma3', 'Why is the sky blue?', stream=True):
   print(part['response'], end='', flush=True)
diff --git a/examples/generate.py b/examples/generate.py
@@ -1,4 +1,4 @@
 from ollama import generate
 
-response = generate('llama3.2', 'Why is the sky blue?')
+response = generate('gemma3', 'Why is the sky blue?')
 print(response['response'])
diff --git a/examples/multimodal-chat.py b/examples/multimodal-chat.py
@@ -11,7 +11,7 @@
 # img = Path(path).read_bytes()
 
 response = chat(
-  model='llama3.2-vision',
+  model='gemma3',
   messages=[
     {
       'role': 'user',

diff --git a/examples/ps.py b/examples/ps.py
@@ -1,7 +1,7 @@
 from ollama import ProcessResponse, chat, ps, pull
 
 # Ensure at least one model is loaded
-response = pull('llama3.2', stream=True)
+response = pull('gemma3', stream=True)
 progress_states = set()
 for progress in response:
   if progress.get('status') in progress_states:
@@ -12,7 +12,7 @@
 print('\n')
 
 print('Waiting for model to load... \n')
-chat(model='llama3.2', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
+chat(model='gemma3', messages=[{'role': 'user', 'content': 'Why is the sky blue?'}])
 
 
 response: ProcessResponse = ps()

diff --git a/examples/pull.py b/examples/pull.py
@@ -3,7 +3,7 @@
 from ollama import pull
 
 current_digest, bars = '', {}
-for progress in pull('llama3.2', stream=True):
+for progress in pull('gemma3', stream=True):
   digest = progress.get('digest', '')
   if digest != current_digest and current_digest in bars:
     bars[current_digest].close()

diff --git a/examples/structured-outputs-image.py b/examples/structured-outputs-image.py
@@ -33,7 +33,7 @@ class ImageDescription(BaseModel):
 
 # Set up chat as usual
 response = chat(
-  model='llama3.2-vision',
+  model='gemma3',
   format=ImageDescription.model_json_schema(),  # Pass in the schema for the response
   messages=[
     {