Skip to content

Commit 1629010

Browse files
committed
improvements and fixes
1 parent 401299c commit 1629010

File tree

7 files changed

+671
-710
lines changed

7 files changed

+671
-710
lines changed

JxlCoder.podspec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Pod::Spec.new do |s|
22
s.name = 'JxlCoder'
3-
s.version = '1.2.5'
3+
s.version = '1.2.6'
44
s.summary = 'JXL coder for iOS and MacOS'
55
s.description = 'Provides support for JXL files in iOS and MacOS'
66
s.homepage = 'https://github.com/awxkee/jxl-coder-swift'

Sources/jxlc/JxlWorker.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ bool DecodeBasicInfo(const uint8_t *jxl, size_t size, size_t *xsize, size_t *ysi
234234
bool EncodeJxlOneshot(const std::vector<uint8_t> &pixels, const uint32_t xsize,
235235
const uint32_t ysize, std::vector<uint8_t> *compressed,
236236
JxlPixelType colorspace, JxlCompressionOption compression_option,
237-
float compression_distance, int effort) {
237+
float compressionDistance, int effort) {
238238
auto enc = JxlEncoderMake(/*memory_manager=*/nullptr);
239239
auto runner = JxlThreadParallelRunnerMake(
240240
/*memory_manager=*/nullptr,
@@ -311,10 +311,18 @@ bool EncodeJxlOneshot(const std::vector<uint8_t> &pixels, const uint32_t xsize,
311311
}
312312

313313
if (JXL_ENC_SUCCESS !=
314-
JxlEncoderSetFrameDistance(frameSettings, compression_distance)) {
314+
JxlEncoderSetFrameDistance(frameSettings, compressionDistance)) {
315315
return false;
316316
}
317317

318+
if (colorspace == rgba) {
319+
if (JXL_ENC_SUCCESS !=
320+
JxlEncoderSetExtraChannelDistance(frameSettings, 0, compressionDistance)) {
321+
return false;
322+
}
323+
}
324+
325+
318326
if (JxlEncoderFrameSettingsSetOption(frameSettings,
319327
JXL_ENC_FRAME_SETTING_EFFORT, effort) != JXL_ENC_SUCCESS) {
320328
return false;

Sources/jxlc/RgbaScaler.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@
3232
#import "XScaler.hpp"
3333

3434
typedef NS_ENUM(NSInteger, JxlIPixelFormat) {
35-
kU8 NS_SWIFT_NAME(Uniform8),
36-
kF16 NS_SWIFT_NAME(Float16)
35+
kU8 NS_SWIFT_NAME(uniform8),
36+
kF16 NS_SWIFT_NAME(float16)
3737
};
3838

3939
@interface RgbaScaler : NSObject

Sources/jxlc/RgbaScaler.mm

Lines changed: 0 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,7 @@
3333

3434
@implementation RgbaScaler
3535

36-
//static bool API_AVAILABLE(macos(13.0), ios(16.0), watchos(9.0), tvos(16.0))
3736
static bool scaleF16iOS16(std::vector<uint8_t> &src, int components, int width, int height, int newWidth, int newHeight, XSampler sampler) {
38-
// if (components != 4) {
3937
std::vector<uint8_t> dst(components * sizeof(uint16_t) * newWidth * newHeight);
4038

4139
scaleImageFloat16(reinterpret_cast<uint16_t*>(src.data()),
@@ -44,109 +42,9 @@ static bool scaleF16iOS16(std::vector<uint8_t> &src, int components, int width,
4442

4543
src = dst;
4644
return true;
47-
// }
48-
//
49-
// std::vector<uint8_t> dst(4 * sizeof(uint16_t) * newWidth * newHeight);
50-
//
51-
// vImage_Buffer srcBuffer = {
52-
// .data = (void*)src.data(),
53-
// .width = static_cast<vImagePixelCount>(width),
54-
// .height = static_cast<vImagePixelCount>(height),
55-
// .rowBytes = width * 4 * sizeof(uint16_t)
56-
// };
57-
//
58-
// vImage_Buffer dstBuffer = {
59-
// .data = dst.data(),
60-
// .width = static_cast<vImagePixelCount>(newWidth),
61-
// .height = static_cast<vImagePixelCount>(newHeight),
62-
// .rowBytes = newWidth * 4 * sizeof(uint16_t)
63-
// };
64-
//
65-
// auto result = vImageScale_ARGB16F(&srcBuffer, &dstBuffer, nullptr, kvImageUseFP16Accumulator);
66-
// if (result != kvImageNoError) {
67-
// return false;
68-
// }
69-
// src = dst;
70-
// return true;
71-
}
72-
73-
static bool scaleF16iOSPre16(std::vector<uint8_t> &src, int components, int width, int height, int newWidth, int newHeight, XSampler sampler) {
74-
75-
vImage_Buffer srcBuffer = {
76-
.data = (void*)src.data(),
77-
.width = static_cast<vImagePixelCount>(width * components),
78-
.height = static_cast<vImagePixelCount>(height),
79-
.rowBytes = width * components * sizeof(uint16_t)
80-
};
81-
82-
vImage_Buffer dstBuffer = {
83-
.data = src.data(),
84-
.width = static_cast<vImagePixelCount>(width * components),
85-
.height = static_cast<vImagePixelCount>(height),
86-
.rowBytes = width * components * sizeof(uint16_t)
87-
};
88-
vImage_Error vEerror = vImageConvert_16Fto16U(&srcBuffer, &dstBuffer, kvImageNoFlags);
89-
if (vEerror != kvImageNoError) {
90-
return false;
91-
}
92-
93-
if (components == 4) {
94-
95-
std::vector<uint8_t> dst(components * sizeof(uint16_t) * newWidth * newHeight);
96-
97-
vImage_Buffer srcBuffer = {
98-
.data = (void*)src.data(),
99-
.width = static_cast<vImagePixelCount>(width),
100-
.height = static_cast<vImagePixelCount>(height),
101-
.rowBytes = width * 4 * sizeof(uint16_t)
102-
};
103-
104-
vImage_Buffer dstBuffer = {
105-
.data = dst.data(),
106-
.width = static_cast<vImagePixelCount>(newWidth),
107-
.height = static_cast<vImagePixelCount>(newHeight),
108-
.rowBytes = newWidth * 4 * sizeof(uint16_t)
109-
};
110-
111-
auto result = vImageScale_ARGB16U(&srcBuffer, &dstBuffer, nullptr, kvImageNoFlags);
112-
if (result != kvImageNoError) {
113-
return false;
114-
}
115-
src = dst;
116-
} else {
117-
std::vector<uint8_t> dst(components * sizeof(uint16_t) * newWidth * newHeight);
118-
119-
scaleImageU16(reinterpret_cast<uint16_t*>(src.data()),
120-
components * sizeof(uint16_t) * width, width, height, reinterpret_cast<uint16_t*>(dst.data()),
121-
components * sizeof(uint16_t) * newWidth, newWidth, newHeight, components, 16, sampler);
122-
src = dst;
123-
}
124-
125-
{
126-
vImage_Buffer srcBuffer = {
127-
.data = (void*)src.data(),
128-
.width = static_cast<vImagePixelCount>(newWidth * components),
129-
.height = static_cast<vImagePixelCount>(newHeight),
130-
.rowBytes = newWidth * components * sizeof(uint16_t)
131-
};
132-
133-
vImage_Buffer dstBuffer = {
134-
.data = (void*)src.data(),
135-
.width = static_cast<vImagePixelCount>(newWidth * components),
136-
.height = static_cast<vImagePixelCount>(newHeight),
137-
.rowBytes = newWidth * components * sizeof(uint16_t)
138-
};
139-
const float scale = 1.0f / float((1 << 16) - 1);
140-
vImage_Error vEerror = vImageConvert_16Uto16F(&srcBuffer, &dstBuffer, kvImageNoFlags);
141-
if (vEerror != kvImageNoError) {
142-
return false;
143-
}
144-
}
145-
return true;
14645
}
14746

14847
+ (bool)scaleRGB8:(std::vector<uint8_t> &)src components:(int)components width:(int)width height:(int)height newWidth:(int)newWidth newHeight:(int)newHeight sampler:(XSampler)sampler {
149-
// if (components != 4) {
15048
std::vector<uint8_t> dst(components * sizeof(uint8_t) * newWidth * newHeight);
15149

15250
scaleImageU8(reinterpret_cast<uint8_t*>(src.data()),
@@ -155,36 +53,10 @@ + (bool)scaleRGB8:(std::vector<uint8_t> &)src components:(int)components width:(
15553
src = dst;
15654

15755
return true;
158-
// }
159-
//
160-
// std::vector<uint8_t> dst(4 * sizeof(uint8_t) * newWidth * newHeight);
161-
//
162-
// vImage_Buffer srcBuffer = {
163-
// .data = (void*)src.data(),
164-
// .width = static_cast<vImagePixelCount>(width),
165-
// .height = static_cast<vImagePixelCount>(height),
166-
// .rowBytes = width * 4 * sizeof(uint8_t)
167-
// };
168-
//
169-
// vImage_Buffer dstBuffer = {
170-
// .data = dst.data(),
171-
// .width = static_cast<vImagePixelCount>(newWidth),
172-
// .height = static_cast<vImagePixelCount>(newHeight),
173-
// .rowBytes = newWidth * 4 * sizeof(uint8_t)
174-
// };
175-
//
176-
// auto result = vImageScale_ARGB8888(&srcBuffer, &dstBuffer, nullptr, kvImageNoFlags);
177-
// if (result != kvImageNoError) {
178-
// return false;
179-
// }
180-
//
181-
// src = dst;
182-
// return true;
18356
}
18457

18558
+(bool) scaleData:(std::vector<uint8_t>&)src width:(int)width height:(int)height newWidth:(int)newWidth newHeight:(int)newHeight components:(int)components pixelFormat:(JxlIPixelFormat)pixelFormat sampler:(XSampler)sampler {
18659

187-
//Flipping not supported
18860
if (newWidth < 0 || newHeight < 0) {
18961
return false;
19062
}
@@ -194,14 +66,8 @@ +(bool) scaleData:(std::vector<uint8_t>&)src width:(int)width height:(int)height
19466
return [self scaleRGB8:src components:components width:width height:height newWidth:newWidth newHeight:newHeight sampler:sampler];
19567
} else if (pixelFormat == kF16) {
19668
return scaleF16iOS16(src, components, width, height, newWidth, newHeight, sampler);
197-
// if (@available(iOS 16.0, macOS 13.0, *)) {
198-
// return scaleF16iOS16(src, components, width, height, newWidth, newHeight, sampler);
199-
// } else {
200-
// return scaleF16iOSPre16(src, components, width, height, newWidth, newHeight, sampler);
201-
// }
20269
}
20370
} catch (const std::bad_alloc& e) {
204-
// Memory allocation has failed
20571
return false;
20672
}
20773
return false;

Sources/jxlc/ScaleInterpolator.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
using namespace half_float;
3232
using namespace std;
3333

34+
#if defined(__clang__)
35+
#pragma clang fp contract(fast) exceptions(ignore) reassociate(on)
36+
#endif
37+
3438
// P Found using maxima
3539
//
3640
// y(x) := 4 * x * (%pi-x) / (%pi^2) ;
@@ -206,7 +210,8 @@ inline T sinc(T x) {
206210
template <typename T>
207211
inline T LanczosWindow(T x, const T a) {
208212
if (abs(x) < a) {
209-
return sinc(T(M_PI) * x) * sinc(T(M_PI) * x / a);
213+
T rv = T(M_PI) * x;
214+
return sinc(rv) * sinc(rv / a);
210215
}
211216
return T(0.0);
212217
}

Sources/jxlc/ScaleInterpolator.h

Lines changed: 67 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,68 @@ T CubicBSpline(T t);
6060
#if __arm64__
6161
#include <arm_neon.h>
6262

63-
inline float32x4_t Cos(const float32x4_t d) {
63+
__attribute__((always_inline))
64+
static inline float32x4_t Cos(const float32x4_t d) {
6465

65-
constexpr float C0 = 0.99940307;
66-
constexpr float C1 = -0.49558072;
67-
constexpr float C2 = 0.03679168;
66+
const float32x4_t C0 = vdupq_n_f32(0.99940307);
67+
const float32x4_t C1 = vdupq_n_f32(-0.49558072);
68+
const float32x4_t C2 = vdupq_n_f32(0.03679168);
6869
constexpr float C3 = -0.00434102;
6970
float32x4_t x2 = vmulq_f32(d, d);
70-
return vmlaq_f32(vdupq_n_f32(C0), x2, vmlaq_f32(vdupq_n_f32(C1), x2, vmlaq_f32(vdupq_n_f32(C2), x2, vdupq_n_f32(C3))));
71+
return vmlaq_f32(C0, x2, vmlaq_f32(C1, x2, vmlaq_n_f32(C2, x2, C3)));
7172
}
7273

73-
inline float32x4_t CubicInterpolation(const float32x4_t d,
74+
__attribute__((always_inline))
75+
static inline float32x4_t FastSin(const float32x4_t v) {
76+
constexpr float A = 4.0f/(M_PI*M_PI);
77+
const float32x4_t P = vdupq_n_f32(0.1952403377008734f);
78+
const float32x4_t Q = vdupq_n_f32(0.01915214119105392f);
79+
const float32x4_t N_PI = vdupq_n_f32(M_PI);
80+
81+
float32x4_t y = vmulq_f32(vmulq_n_f32(v, A), vsubq_f32(N_PI, v));
82+
83+
const float32x4_t fract = vsubq_f32(vsubq_f32(vdupq_n_f32(1.0f), P), Q);
84+
return vmulq_f32(y, vmlaq_f32(fract, y, vmlaq_f32(P, y, Q)));
85+
}
86+
87+
__attribute__((always_inline))
88+
static inline float32x4_t Sinc(const float32x4_t v) {
89+
const float32x4_t zeros = vdupq_n_f32(0);
90+
const float32x4_t ones = vdupq_n_f32(0);
91+
uint32x4_t mask = vceqq_f32(v, zeros);
92+
// if < 0 then set to 1
93+
float32x4_t x = vbslq_f32(mask, ones, v);
94+
x = vmulq_f32(FastSin(v), vrecpeq_f32(v));
95+
// elements that were < 0 set to zero
96+
x = vbslq_f32(mask, zeros, v);
97+
return x;
98+
}
99+
100+
__attribute__((always_inline))
101+
static inline float32x4_t LanczosWindow(const float32x4_t v, const float a) {
102+
const float32x4_t fullLength = vdupq_n_f32(a);
103+
const float32x4_t invLength = vrecpeq_f32(fullLength);
104+
const float32x4_t zeros = vdupq_n_f32(0);
105+
uint32x4_t mask = vcltq_f32(vabsq_f32(v), fullLength);
106+
float32x4_t rv = vmulq_n_f32(v, M_PI);
107+
float32x4_t x = vmulq_f32(Sinc(rv), Sinc(vmulq_f32(v, invLength)));
108+
x = vbslq_f32(mask, zeros, x);
109+
return x;
110+
}
111+
112+
__attribute__((always_inline))
113+
static inline float32x4_t HannWindow(const float32x4_t d, const float length) {
114+
const float32x4_t fullLength = vrecpeq_f32(vdupq_n_f32(length));
115+
const float32x4_t halfLength = vdupq_n_f32(length / 2);
116+
const float32x4_t zeros = vdupq_n_f32(0);
117+
uint32x4_t mask = vcltq_f32(vabsq_f32(d), halfLength);
118+
float32x4_t cx = Cos(vmulq_f32(vmulq_n_f32(d, M_PI), fullLength));
119+
cx = vmulq_f32(vmulq_f32(cx, cx), fullLength);
120+
return vbslq_f32(mask, zeros, cx);
121+
}
122+
123+
__attribute__((always_inline))
124+
static inline float32x4_t CubicInterpolation(const float32x4_t d,
74125
const float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3,
75126
const float C, const float B) {
76127

@@ -92,17 +143,8 @@ inline float32x4_t CubicInterpolation(const float32x4_t d,
92143
return result;
93144
}
94145

95-
inline float32x4_t HannWindow(const float32x4_t d, const float length) {
96-
float32x4_t x = vabsq_f32(d);
97-
uint32x4_t mask = vcltq_f32(x, vdupq_n_f32(length / 2));
98-
99-
x = Cos(vdivq_f32(vmulq_f32(vdupq_n_f32(M_PI), x), vdupq_n_f32(length)));
100-
x = vmulq_n_f32(vmulq_f32(x, x), length / 2);
101-
x = vbslq_f32(mask, vdupq_n_f32(0), x);
102-
return x;
103-
}
104-
105-
inline float32x4_t CatmullRom(const float32x4_t d,
146+
__attribute__((always_inline))
147+
static inline float32x4_t CatmullRom(const float32x4_t d,
106148
const float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3) {
107149

108150
float32x4_t x = vabsq_f32(d);
@@ -119,7 +161,8 @@ inline float32x4_t CatmullRom(const float32x4_t d,
119161
return result;
120162
}
121163

122-
inline float32x4_t SimpleCubic(const float32x4_t d,
164+
__attribute__((always_inline))
165+
static inline float32x4_t SimpleCubic(const float32x4_t d,
123166
const float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3) {
124167

125168
float32x4_t duplet = vmulq_f32(d, d);
@@ -139,17 +182,20 @@ inline float32x4_t SimpleCubic(const float32x4_t d,
139182
return result;
140183
}
141184

142-
inline float32x4_t MitchellNetravali(float32x4_t d,
185+
__attribute__((always_inline))
186+
static inline float32x4_t MitchellNetravali(float32x4_t d,
143187
float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3) {
144188
return CubicInterpolation(d, p0, p1, p2, p3, 1.0f/3.0f, 1.0f/3.0f);
145189
}
146190

147-
inline float32x4_t CubicHermite(const float32x4_t d,
191+
__attribute__((always_inline))
192+
static inline float32x4_t CubicHermite(const float32x4_t d,
148193
const float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3) {
149194
return CubicInterpolation(d, p0, p1, p2, p3, 0.0f, 0.0f);
150195
}
151196

152-
inline float32x4_t CubicBSpline(const float32x4_t d,
197+
__attribute__((always_inline))
198+
static inline float32x4_t CubicBSpline(const float32x4_t d,
153199
const float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3) {
154200
return CubicInterpolation(d, p0, p1, p2, p3, 0.0f, 1.0f);
155201
}

0 commit comments

Comments
 (0)