Skip to content

Commit daacc2e

Browse files
authored
Merge pull request #340 from jhc13/replace-auto-gptq
Replace AutoGPTQ with GPTQModel
2 parents c52d6df + 098e672 commit daacc2e

File tree

11 files changed

+117
-116
lines changed

11 files changed

+117
-116
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,14 @@ extract the files if you don't have it on your system.
3232
manually (see below).
3333
- Linux users: You may need to install `libxcb-cursor0`.
3434
(See [this Stack Overflow answer](https://stackoverflow.com/a/75941575).) You
35-
may also have to install `python3.11-dev` or `python3.10-dev` (depending on
35+
may also have to install `python3.12-dev` or `python3.11-dev` (depending on
3636
your Python version) if you get an error while trying to use a CogVLM2
3737
model. (See [this issue](https://github.com/jhc13/taggui/issues/234).)
3838

3939
Alternatively, you can install manually by cloning this repository and
4040
installing the dependencies in `requirements.txt`.
4141
Run `taggui/run_gui.py` to start the program.
42-
Python 3.11 is recommended, but Python 3.10 should also work.
42+
Python 3.12 is recommended, but Python 3.11 should also work.
4343

4444
## Usage
4545

requirements.txt

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,41 @@
1-
accelerate==1.1.0
2-
bitsandbytes==0.45.0
1+
accelerate==1.4.0
2+
bitsandbytes==0.45.2
33
ExifRead==3.0.0
44
imagesize==1.4.1
5-
pillow==11.0.0
6-
pyparsing==3.2.0
7-
PySide6==6.8.1
8-
transformers==4.45.2
9-
10-
# PyTorch
11-
# AutoGPTQ does not support PyTorch v2.3.
12-
torch==2.2.2; platform_system != "Windows"
13-
https://download.pytorch.org/whl/cu121/torch-2.2.2%2Bcu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
14-
https://download.pytorch.org/whl/cu121/torch-2.2.2%2Bcu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
5+
pillow==11.1.0
6+
pyparsing==3.2.1
7+
PySide6==6.8.2.1
8+
transformers==4.48.3
159

1610
# CogAgent
17-
timm==1.0.12
11+
timm==1.0.14
1812

1913
# CogVLM
20-
einops==0.8.0
21-
protobuf==5.29.1
14+
einops==0.8.1
15+
protobuf==5.29.3
2216
sentencepiece==0.2.0
23-
# These versions of torchvision and xFormers are the latest versions compatible
24-
# with PyTorch v2.2.2.
25-
torchvision==0.17.2
26-
xformers==0.0.25.post1
17+
torchvision==0.21.0
18+
xformers==0.0.29.post3
2719

2820
# InternLM-XComposer2
29-
auto-gptq==0.7.1; platform_system == "Linux" or platform_system == "Windows"
30-
# PyTorch versions prior to 2.3 do not support NumPy v2.
31-
numpy==1.26.4
21+
gptqmodel==1.9.0
22+
numpy==2.2.3
3223

3324
# WD Tagger
34-
huggingface-hub==0.26.5
25+
huggingface-hub==0.29.1
3526
onnxruntime==1.20.1
3627

28+
# PyTorch
29+
torch==2.6.0; platform_system != "Windows"
30+
https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp312-cp312-win_amd64.whl; platform_system == "Windows" and python_version == "3.12"
31+
https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
32+
3733
# FlashAttention (Florence-2, Phi-3-Vision)
38-
flash-attn==2.6.3; platform_system == "Linux"
39-
https://github.com/bdashore3/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
40-
https://github.com/bdashore3/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
34+
flash-attn==2.7.4.post1; platform_system == "Linux"
35+
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp312-cp312-win_amd64.whl; platform_system == "Windows" and python_version == "3.12"
36+
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
37+
38+
# Triton (CogVLM2)
39+
triton==3.2.0; platform_system == "Linux"
40+
https://github.com/woct0rdho/triton-windows/releases/download/v3.2.0-windows.post10/triton-3.2.0-cp312-cp312-win_amd64.whl; platform_system == "Windows" and python_version == "3.12"
41+
https://github.com/woct0rdho/triton-windows/releases/download/v3.2.0-windows.post10/triton-3.2.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"

taggui-windows.spec

Lines changed: 0 additions & 61 deletions
This file was deleted.

taggui-linux.spec renamed to taggui.spec

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ from PyInstaller.utils.hooks import collect_data_files
33

44
datas = [('clip-vit-base-patch32', 'clip-vit-base-patch32'),
55
('images/icon.ico', 'images')]
6-
datas += [('/usr/include/python3.11', 'include/python3.11')]
76
datas += collect_data_files('triton')
87
datas += collect_data_files('xformers')
9-
hiddenimports = ['timm.models.layers']
8+
hiddenimports = [
9+
'timm.models.layers',
10+
'xformers._C',
11+
]
1012

1113
block_cipher = None
1214

@@ -26,7 +28,6 @@ a = Analysis(
2628
cipher=block_cipher,
2729
noarchive=False,
2830
module_collection_mode={
29-
'auto_gptq': 'pyz+py',
3031
'triton': 'py',
3132
'xformers': 'pyz+py',
3233
},

taggui/auto_captioning/auto_captioning_model.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ def replace_template_variables(text: str, image: Image) -> str:
3737

3838
class AutoCaptioningModel:
3939
dtype = torch.float16
40+
# When loading a model, if the `use_safetensors` argument is not set and
41+
# both a safetensors and a non-safetensors version of the model are
42+
# available, both versions get downloaded. This should be set to `None` for
43+
# models that do not have a safetensors version.
44+
use_safetensors = True
4045
model_load_context_manager = nullcontext()
4146
transformers_model_class = AutoModelForVision2Seq
4247
image_mode = 'RGB'
@@ -90,7 +95,8 @@ def get_processor(self):
9095
trust_remote_code=True)
9196

9297
def get_model_load_arguments(self) -> dict:
93-
arguments = {'device_map': self.device, 'trust_remote_code': True}
98+
arguments = {'device_map': self.device, 'trust_remote_code': True,
99+
'use_safetensors': self.use_safetensors}
94100
if self.load_in_4_bit:
95101
quantization_config = BitsAndBytesConfig(
96102
load_in_4bit=True,
@@ -163,7 +169,7 @@ def load_processor_and_model(self):
163169
self.thread_parent.is_model_loaded_in_4_bit = self.load_in_4_bit
164170

165171
def monkey_patch_after_loading(self):
166-
pass
172+
return
167173

168174
@staticmethod
169175
def get_captioning_start_datetime_string(

taggui/auto_captioning/models/cog.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class Cogvlm(Cog):
9191

9292
def monkey_patch_after_loading(self):
9393
"""
94-
Monkey patch CogVLM to support `caption_start`. This has to be done
94+
Monkey patch the model to support `caption_start`. This has to be done
9595
every time after loading because `caption_start` might have changed.
9696
"""
9797
cogvlm_module = next(
@@ -118,7 +118,9 @@ class Cogagent(Cog):
118118
template_version = 'chat_old'
119119

120120
def monkey_patch_after_loading(self):
121-
"""Monkey patch CogAgent to support beam search and `caption_start`."""
121+
"""
122+
Monkey patch the model to support beam search and `caption_start`.
123+
"""
122124
cogagent_module = next(module
123125
for module_name, module in sys.modules.items()
124126
if 'modeling_cogagent' in module_name)

taggui/auto_captioning/models/cogvlm2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ class Cogvlm2(AutoCaptioningModel):
1919
def get_additional_error_message(self) -> str | None:
2020
if not importlib.util.find_spec('triton'):
2121
return ('This model requires the `triton` package, which is only '
22-
'available on Linux. Therefore, this model cannot be run '
23-
'on this system.')
22+
'available for Linux and Windows. Therefore, this model '
23+
'cannot be run on this system.')
2424
is_4_bit_model = 'int4' in self.model_id
2525
if is_4_bit_model:
2626
if self.device_setting == CaptionDevice.CPU:

taggui/auto_captioning/models/florence_2.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66

77
class Florence2(AutoCaptioningModel):
8+
use_safetensors = None
89
transformers_model_class = AutoModelForCausalLM
910
task_prompts = [
1011
'<CAPTION>',
@@ -30,6 +31,7 @@ def get_default_prompt(self) -> str:
3031

3132

3233
class Florence2Promptgen(Florence2):
34+
use_safetensors = True
3335
task_prompts = [
3436
'<GENERATE_PROMPT>',
3537
'<CAPTION>',

taggui/auto_captioning/models/llava_llama_3.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
1+
from transformers import AutoConfig, AutoProcessor
2+
13
from auto_captioning.auto_captioning_model import AutoCaptioningModel
24

35

46
class LlavaLlama3(AutoCaptioningModel):
7+
def get_processor(self):
8+
config = AutoConfig.from_pretrained(self.model_id)
9+
patch_size = config.vision_config.patch_size
10+
return AutoProcessor.from_pretrained(
11+
self.model_id, trust_remote_code=True, patch_size=patch_size)
12+
513
@staticmethod
614
def get_default_prompt() -> str:
715
return 'Describe the image in one sentence.'

taggui/auto_captioning/models/moondream.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from auto_captioning.auto_captioning_model import AutoCaptioningModel
1010
from utils.image import Image
1111

12+
MOONDREAM2_REVISION = '2024-08-26'
13+
1214

1315
class Moondream(AutoCaptioningModel):
1416
transformers_model_class = AutoModelForCausalLM
@@ -95,6 +97,11 @@ def get_processor(self):
9597
return AutoTokenizer.from_pretrained(self.model_id,
9698
trust_remote_code=True)
9799

100+
def get_model_load_arguments(self) -> dict:
101+
arguments = super().get_model_load_arguments()
102+
arguments['revision'] = MOONDREAM2_REVISION
103+
return arguments
104+
98105
def get_model_inputs(self, image_prompt: str, image: Image) -> dict:
99106
text = self.get_input_text(image_prompt)
100107
pil_image = self.load_image(image)

0 commit comments

Comments
 (0)