Merge pull request #340 from jhc13/replace-auto-gptq

jhc13 · web-flow · commit daacc2e82371 · 2025-02-22T13:52:19.000-05:00
Replace AutoGPTQ with GPTQModel
diff --git a/README.md b/README.md
@@ -32,14 +32,14 @@ extract the files if you don't have it on your system.
   manually (see below).
 - Linux users: You may need to install `libxcb-cursor0`.
   (See [this Stack Overflow answer](https://stackoverflow.com/a/75941575).) You
-  may also have to install `python3.11-dev` or `python3.10-dev` (depending on
+  may also have to install `python3.12-dev` or `python3.11-dev` (depending on
   your Python version) if you get an error while trying to use a CogVLM2
   model. (See [this issue](https://github.com/jhc13/taggui/issues/234).)
 
 Alternatively, you can install manually by cloning this repository and
 installing the dependencies in `requirements.txt`.
 Run `taggui/run_gui.py` to start the program.
-Python 3.11 is recommended, but Python 3.10 should also work.
+Python 3.12 is recommended, but Python 3.11 should also work.
 
 ## Usage
 
diff --git a/requirements.txt b/requirements.txt
@@ -1,40 +1,41 @@
-accelerate==1.1.0
-bitsandbytes==0.45.0
+accelerate==1.4.0
+bitsandbytes==0.45.2
 ExifRead==3.0.0
 imagesize==1.4.1
-pillow==11.0.0
-pyparsing==3.2.0
-PySide6==6.8.1
-transformers==4.45.2
-
-# PyTorch
-# AutoGPTQ does not support PyTorch v2.3.
-torch==2.2.2; platform_system != "Windows"
-https://download.pytorch.org/whl/cu121/torch-2.2.2%2Bcu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://download.pytorch.org/whl/cu121/torch-2.2.2%2Bcu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+pillow==11.1.0
+pyparsing==3.2.1
+PySide6==6.8.2.1
+transformers==4.48.3
 
 # CogAgent
-timm==1.0.12
+timm==1.0.14
 
 # CogVLM
-einops==0.8.0
-protobuf==5.29.1
+einops==0.8.1
+protobuf==5.29.3
 sentencepiece==0.2.0
-# These versions of torchvision and xFormers are the latest versions compatible
-# with PyTorch v2.2.2.
-torchvision==0.17.2
-xformers==0.0.25.post1
+torchvision==0.21.0
+xformers==0.0.29.post3
 
 # InternLM-XComposer2
-auto-gptq==0.7.1; platform_system == "Linux" or platform_system == "Windows"
-# PyTorch versions prior to 2.3 do not support NumPy v2.
-numpy==1.26.4
+gptqmodel==1.9.0
+numpy==2.2.3
 
 # WD Tagger
-huggingface-hub==0.26.5
+huggingface-hub==0.29.1
 onnxruntime==1.20.1
 
+# PyTorch
+torch==2.6.0; platform_system != "Windows"
+https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp312-cp312-win_amd64.whl; platform_system == "Windows" and python_version == "3.12"
+https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+
 # FlashAttention (Florence-2, Phi-3-Vision)
-flash-attn==2.6.3; platform_system == "Linux"
-https://github.com/bdashore3/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/bdashore3/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+flash-attn==2.7.4.post1; platform_system == "Linux"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp312-cp312-win_amd64.whl; platform_system == "Windows" and python_version == "3.12"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+
+# Triton (CogVLM2)
+triton==3.2.0; platform_system == "Linux"
+https://github.com/woct0rdho/triton-windows/releases/download/v3.2.0-windows.post10/triton-3.2.0-cp312-cp312-win_amd64.whl; platform_system == "Windows" and python_version == "3.12"
+https://github.com/woct0rdho/triton-windows/releases/download/v3.2.0-windows.post10/triton-3.2.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/taggui-windows.spec b/taggui-windows.spec
diff --git a/taggui.spec b/taggui.spec
@@ -3,10 +3,12 @@ from PyInstaller.utils.hooks import collect_data_files
 
 datas = [('clip-vit-base-patch32', 'clip-vit-base-patch32'),
          ('images/icon.ico', 'images')]
-datas += [('/usr/include/python3.11', 'include/python3.11')]
 datas += collect_data_files('triton')
 datas += collect_data_files('xformers')
-hiddenimports = ['timm.models.layers']
+hiddenimports = [
+    'timm.models.layers',
+    'xformers._C',
+]
 
 block_cipher = None
 
@@ -26,7 +28,6 @@ a = Analysis(
     cipher=block_cipher,
     noarchive=False,
     module_collection_mode={
-        'auto_gptq': 'pyz+py',
         'triton': 'py',
         'xformers': 'pyz+py',
     },
diff --git a/taggui/auto_captioning/auto_captioning_model.py b/taggui/auto_captioning/auto_captioning_model.py
@@ -37,6 +37,11 @@ def replace_template_variables(text: str, image: Image) -> str:
 
 class AutoCaptioningModel:
     dtype = torch.float16
+    # When loading a model, if the `use_safetensors` argument is not set and
+    # both a safetensors and a non-safetensors version of the model are
+    # available, both versions get downloaded. This should be set to `None` for
+    # models that do not have a safetensors version.
+    use_safetensors = True
     model_load_context_manager = nullcontext()
     transformers_model_class = AutoModelForVision2Seq
     image_mode = 'RGB'
@@ -90,7 +95,8 @@ def get_processor(self):
                                              trust_remote_code=True)
 
     def get_model_load_arguments(self) -> dict:
-        arguments = {'device_map': self.device, 'trust_remote_code': True}
+        arguments = {'device_map': self.device, 'trust_remote_code': True,
+                     'use_safetensors': self.use_safetensors}
         if self.load_in_4_bit:
             quantization_config = BitsAndBytesConfig(
                 load_in_4bit=True,
@@ -163,7 +169,7 @@ def load_processor_and_model(self):
         self.thread_parent.is_model_loaded_in_4_bit = self.load_in_4_bit
 
     def monkey_patch_after_loading(self):
-        pass
+        return
 
     @staticmethod
     def get_captioning_start_datetime_string(
diff --git a/taggui/auto_captioning/models/cog.py b/taggui/auto_captioning/models/cog.py
@@ -91,7 +91,7 @@ class Cogvlm(Cog):
 
     def monkey_patch_after_loading(self):
         """
-        Monkey patch CogVLM to support `caption_start`. This has to be done
+        Monkey patch the model to support `caption_start`. This has to be done
         every time after loading because `caption_start` might have changed.
         """
         cogvlm_module = next(
@@ -118,7 +118,9 @@ class Cogagent(Cog):
     template_version = 'chat_old'
 
     def monkey_patch_after_loading(self):
-        """Monkey patch CogAgent to support beam search and `caption_start`."""
+        """
+        Monkey patch the model to support beam search and `caption_start`.
+        """
         cogagent_module = next(module
                                for module_name, module in sys.modules.items()
                                if 'modeling_cogagent' in module_name)
diff --git a/taggui/auto_captioning/models/cogvlm2.py b/taggui/auto_captioning/models/cogvlm2.py
@@ -19,8 +19,8 @@ class Cogvlm2(AutoCaptioningModel):
     def get_additional_error_message(self) -> str | None:
         if not importlib.util.find_spec('triton'):
             return ('This model requires the `triton` package, which is only '
-                    'available on Linux. Therefore, this model cannot be run '
-                    'on this system.')
+                    'available for Linux and Windows. Therefore, this model '
+                    'cannot be run on this system.')
         is_4_bit_model = 'int4' in self.model_id
         if is_4_bit_model:
             if self.device_setting == CaptionDevice.CPU:
diff --git a/taggui/auto_captioning/models/florence_2.py b/taggui/auto_captioning/models/florence_2.py
@@ -5,6 +5,7 @@
 
 
 class Florence2(AutoCaptioningModel):
+    use_safetensors = None
     transformers_model_class = AutoModelForCausalLM
     task_prompts = [
         '<CAPTION>',
@@ -30,6 +31,7 @@ def get_default_prompt(self) -> str:
 
 
 class Florence2Promptgen(Florence2):
+    use_safetensors = True
     task_prompts = [
         '<GENERATE_PROMPT>',
         '<CAPTION>',
diff --git a/taggui/auto_captioning/models/llava_llama_3.py b/taggui/auto_captioning/models/llava_llama_3.py
@@ -1,7 +1,15 @@
+from transformers import AutoConfig, AutoProcessor
+
 from auto_captioning.auto_captioning_model import AutoCaptioningModel
 
 
 class LlavaLlama3(AutoCaptioningModel):
+    def get_processor(self):
+        config = AutoConfig.from_pretrained(self.model_id)
+        patch_size = config.vision_config.patch_size
+        return AutoProcessor.from_pretrained(
+            self.model_id, trust_remote_code=True, patch_size=patch_size)
+
     @staticmethod
     def get_default_prompt() -> str:
         return 'Describe the image in one sentence.'
diff --git a/taggui/auto_captioning/models/moondream.py b/taggui/auto_captioning/models/moondream.py
@@ -9,6 +9,8 @@
 from auto_captioning.auto_captioning_model import AutoCaptioningModel
 from utils.image import Image
 
+MOONDREAM2_REVISION = '2024-08-26'
+
 
 class Moondream(AutoCaptioningModel):
     transformers_model_class = AutoModelForCausalLM
@@ -95,6 +97,11 @@ def get_processor(self):
         return AutoTokenizer.from_pretrained(self.model_id,
                                              trust_remote_code=True)
 
+    def get_model_load_arguments(self) -> dict:
+        arguments = super().get_model_load_arguments()
+        arguments['revision'] = MOONDREAM2_REVISION
+        return arguments
+
     def get_model_inputs(self, image_prompt: str, image: Image) -> dict:
         text = self.get_input_text(image_prompt)
         pil_image = self.load_image(image)
diff --git a/taggui/auto_captioning/models/xcomposer2.py b/taggui/auto_captioning/models/xcomposer2.py