Skip to content

Commit 34a7a68

Browse files
committed
loop unroll prepare
1 parent a1ff2c8 commit 34a7a68

File tree

1 file changed

+40
-40
lines changed

1 file changed

+40
-40
lines changed

HDR10Capture2019/MLConverter.cpp

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,10 @@ MLConverter::MLConverter(void)
7171
void
7272
MLConverter::Argb8bitToR8G8B8A8(const uint32_t* pFrom, uint32_t* pTo, const int width, const int height)
7373
{
74-
int pos = 0;
7574
for (int y = 0; y < height; ++y) {
7675
for (int x = 0; x < width; ++x) {
76+
const int pos = x + y * width;
77+
7778
// bmdFormat8BitARGB
7879
// ビッグエンディアンのA8R8G8B8 → リトルエンディアンのR8G8B8A8
7980
const uint32_t v = NtoHL(pFrom[pos]);
@@ -87,8 +88,6 @@ MLConverter::Argb8bitToR8G8B8A8(const uint32_t* pFrom, uint32_t* pTo, const int
8788
const uint32_t g = (v >> 8) & 0xff;
8889
const uint32_t b = (v >> 0) & 0xff;
8990
pTo[pos] = (a << 24) + (b << 16) + (g << 8) + r;
90-
91-
++pos;
9291
}
9392
}
9493
}
@@ -97,9 +96,10 @@ void
9796
MLConverter::Rgb10bitToRGBA8bit(const uint32_t *pFrom, uint32_t *pTo, const int width, const int height, const uint8_t alpha)
9897
{
9998
const uint32_t a = alpha;
100-
int pos = 0;
10199
for (int y = 0; y < height; ++y) {
102100
for (int x = 0; x < width; ++x) {
101+
const int pos = x + y * width;
102+
103103
// bmdFormat10BitRGB
104104
// ビッグエンディアンのX2R10G10B10 → リトルエンディアンのR8G8B8A8
105105
const uint32_t v = NtoHL(pFrom[pos]);
@@ -112,8 +112,6 @@ MLConverter::Rgb10bitToRGBA8bit(const uint32_t *pFrom, uint32_t *pTo, const int
112112
const uint32_t g = (v >> 12) & 0xff;
113113
const uint32_t b = (v >> 2) & 0xff;
114114
pTo[pos] = (a << 24) + (b << 16) + (g << 8) + r;
115-
116-
++pos;
117115
}
118116
}
119117
}
@@ -125,9 +123,10 @@ void
125123
MLConverter::Rgb10bitToR10G10B10A2(const uint32_t* pFrom, uint32_t* pTo, const int width, const int height, const uint8_t alpha)
126124
{
127125
const uint32_t a = (alpha >> 6) & 0x3;
128-
int pos = 0;
129126
for (int y = 0; y < height; ++y) {
130127
for (int x = 0; x < width; ++x) {
128+
const int pos = x + y * width;
129+
131130
// bmdFormat10BitRGB
132131
// ビッグエンディアンのX2R10G10B10 → リトルエンディアンのR8G8B8A8
133132
const uint32_t v = NtoHL(pFrom[pos]);
@@ -140,8 +139,6 @@ MLConverter::Rgb10bitToR10G10B10A2(const uint32_t* pFrom, uint32_t* pTo, const i
140139
const uint32_t g = (v >> 10) & 0x3ff;
141140
const uint32_t b = (v >> 0) & 0x3ff;
142141
pTo[pos] = (a << 30) + (b << 20) + (g << 10) + r;
143-
144-
++pos;
145142
}
146143
}
147144
}
@@ -156,10 +153,11 @@ MLConverter::R10G10B10A2ToExrHalfFloat(const uint32_t* pFrom, uint16_t* pTo, con
156153
// 0.0~1.0の範囲の値。
157154
half aF = (float)(alpha /255.0f);
158155

159-
int readPos = 0;
160-
int writePos = 0;
161156
for (int y = 0; y < height; ++y) {
162157
for (int x = 0; x < width; ++x) {
158+
const int readPos = x + y * width;
159+
const int writePos = readPos * 4;
160+
163161
const uint32_t v = pFrom[readPos];
164162
// v LSB
165163
// XXRRRRRR RRRRGGGG GGGGGGBB BBBBBBBB
@@ -199,9 +197,6 @@ MLConverter::R10G10B10A2ToExrHalfFloat(const uint32_t* pFrom, uint16_t* pTo, con
199197
assert(0);
200198
break;
201199
}
202-
203-
++readPos;
204-
writePos += 4;
205200
}
206201
}
207202
}
@@ -213,9 +208,11 @@ void
213208
MLConverter::R10G10B10A2ToR210(const uint32_t* pFrom, uint32_t* pTo, const int width, const int height, const uint8_t alpha)
214209
{
215210
const uint32_t a = (alpha >> 6) & 0x3;
216-
int pos = 0;
211+
217212
for (int y = 0; y < height; ++y) {
218213
for (int x = 0; x < width; ++x) {
214+
const int pos = x + y * width;
215+
219216
const uint32_t v = pFrom[pos];
220217
// v LSB
221218
// XXRRRRRR RRRRGGGG GGGGGGBB BBBBBBBB
@@ -230,7 +227,6 @@ MLConverter::R10G10B10A2ToR210(const uint32_t* pFrom, uint32_t* pTo, const int w
230227

231228
const uint32_t r210 = (a << 30) + (r << 20) + (g << 10) + b;
232229
pTo[pos] = HtoNL(r210);
233-
++pos;
234230
}
235231
}
236232
}
@@ -241,12 +237,16 @@ MLConverter::R10G10B10A2ToR210(const uint32_t* pFrom, uint32_t* pTo, const int w
241237
void
242238
MLConverter::Rgb12bitToR8G8B8A8(const uint32_t* pFrom, uint32_t* pTo, const int width, const int height, const uint8_t alpha)
243239
{
240+
/// widthは8で割り切れます。
241+
assert((width & 7) == 0);
242+
244243
const uint8_t a = alpha;
245-
int fromPos = 0;
246-
int toPos = 0;
247244
const int pixelCount = width * height;
248245

249-
while (toPos < pixelCount) {
246+
for (int i=0; i<pixelCount/8; ++i) {
247+
const int fromPos = i * 9;
248+
const int toPos = i * 8;
249+
250250
// 8pixelsのRGB3チャンネルのデータが36バイト=9個のuint32に入っている。
251251
// 3ch * 8px * 12bit / 8bit = 36バイト。
252252
const uint32_t w0 = NtoHL(pFrom[fromPos + 0]);
@@ -348,9 +348,6 @@ MLConverter::Rgb12bitToR8G8B8A8(const uint32_t* pFrom, uint32_t* pTo, const int
348348
pTo[toPos + 5] = (a << 24) + (b5 << 16) + (g5 << 8) + r5;
349349
pTo[toPos + 6] = (a << 24) + (b6 << 16) + (g6 << 8) + r6;
350350
pTo[toPos + 7] = (a << 24) + (b7 << 16) + (g7 << 8) + r7;
351-
352-
fromPos += 9;
353-
toPos += 8;
354351
}
355352
}
356353

@@ -360,12 +357,16 @@ MLConverter::Rgb12bitToR8G8B8A8(const uint32_t* pFrom, uint32_t* pTo, const int
360357
void
361358
MLConverter::Rgb12bitToR10G10B10A2(const uint32_t* pFrom, uint32_t* pTo, const int width, const int height, const uint8_t alpha)
362359
{
360+
/// widthは8で割り切れます。
361+
assert((width & 7) == 0);
362+
363363
const uint8_t a = (alpha>>6) & 0x3;
364-
int fromPos = 0;
365-
int toPos = 0;
366364
const int pixelCount = width * height;
367365

368-
while (toPos < pixelCount) {
366+
for (int i=0; i<pixelCount/8; ++i) {
367+
const int fromPos = i * 9;
368+
const int toPos = i * 8;
369+
369370
// 8pixelsのRGB3チャンネルのデータが36バイト=9個のuint32に入っている。
370371
// 3ch * 8px * 12bit / 8bit = 36バイト。
371372
const uint32_t w0 = NtoHL(pFrom[fromPos + 0]);
@@ -468,9 +469,6 @@ MLConverter::Rgb12bitToR10G10B10A2(const uint32_t* pFrom, uint32_t* pTo, const i
468469
pTo[toPos + 5] = (a << 30) + (b5 << 20) + (g5 << 10) + r5;
469470
pTo[toPos + 6] = (a << 30) + (b6 << 20) + (g6 << 10) + r6;
470471
pTo[toPos + 7] = (a << 30) + (b7 << 20) + (g7 << 10) + r7;
471-
472-
fromPos += 9;
473-
toPos += 8;
474472
}
475473
}
476474

@@ -480,12 +478,16 @@ MLConverter::Rgb12bitToR10G10B10A2(const uint32_t* pFrom, uint32_t* pTo, const i
480478
void
481479
MLConverter::Rgb12bitToR210(const uint32_t* pFrom, uint32_t* pTo, const int width, const int height)
482480
{
481+
/// widthは8で割り切れます。
482+
assert((width & 7) == 0);
483+
483484
const uint8_t a = 0x3;
484-
int fromPos = 0;
485-
int toPos = 0;
486485
const int pixelCount = width * height;
487486

488-
while (toPos < pixelCount) {
487+
for (int i=0; i<pixelCount/8; ++i) {
488+
const int fromPos = i * 9;
489+
const int toPos = i * 8;
490+
489491
// 8pixelsのRGB3チャンネルのデータが36バイト=9個のuint32に入っている。
490492
// 3ch * 8px * 12bit / 8bit = 36バイト。
491493
const uint32_t w0 = NtoHL(pFrom[fromPos + 0]);
@@ -601,9 +603,6 @@ MLConverter::Rgb12bitToR210(const uint32_t* pFrom, uint32_t* pTo, const int widt
601603
pTo[toPos + 5] = HtoNL(r210_5);
602604
pTo[toPos + 6] = HtoNL(r210_6);
603605
pTo[toPos + 7] = HtoNL(r210_7);
604-
605-
fromPos += 9;
606-
toPos += 8;
607606
}
608607
}
609608

@@ -614,12 +613,16 @@ MLConverter::Rgb12bitToR210(const uint32_t* pFrom, uint32_t* pTo, const int widt
614613
void
615614
MLConverter::Rgb12bitToR16G16B16A16(const uint32_t* pFrom, uint64_t* pTo, const int width, const int height, const uint8_t alpha)
616615
{
616+
/// widthは8で割り切れます。
617+
assert((width & 7) == 0);
618+
617619
const uint16_t a = alpha * 257; //< 255 * 257 = 65535
618-
int fromPos = 0;
619-
int toPos = 0;
620620
const int pixelCount = width * height;
621621

622-
while (toPos < pixelCount) {
622+
for (int i = 0; i < pixelCount / 8; ++i) {
623+
const int fromPos = i * 9;
624+
const int toPos = i * 8;
625+
623626
// 8pixelsのRGB3チャンネルのデータが36バイト=9個のuint32に入っている。
624627
// 3ch * 8px * 12bit / 8bit = 36バイト。
625628
const uint32_t w0 = NtoHL(pFrom[fromPos + 0]);
@@ -721,9 +724,6 @@ MLConverter::Rgb12bitToR16G16B16A16(const uint32_t* pFrom, uint64_t* pTo, const
721724
pTo[toPos + 5] = ((uint64_t)a << 48) + ((uint64_t)b5 << 32) + ((uint32_t)g5 << 16) + r5;
722725
pTo[toPos + 6] = ((uint64_t)a << 48) + ((uint64_t)b6 << 32) + ((uint32_t)g6 << 16) + r6;
723726
pTo[toPos + 7] = ((uint64_t)a << 48) + ((uint64_t)b7 << 32) + ((uint32_t)g7 << 16) + r7;
724-
725-
fromPos += 9;
726-
toPos += 8;
727727
}
728728

729729
}

0 commit comments

Comments
 (0)