2525
2626#include " packer.h"
2727#include " GrkImageMeta.h"
28+ #include " GrkImageSIMD.h"
2829
2930namespace grk
3031{
@@ -564,14 +565,22 @@ void clip(grk_image_comp* component, uint8_t precision)
564565 }
565566
566567 // Clip the data
567- for ( uint32_t j = 0 ; j < component-> h ; ++j )
568+ if constexpr (std::is_same_v<T, int32_t > )
568569 {
569- for (uint32_t i = 0 ; i < component->w ; ++i)
570+ hwy_clip_i32 (data, component->w , component->h , component->stride , (int32_t )minimum,
571+ (int32_t )maximum);
572+ }
573+ else
574+ {
575+ for (uint32_t j = 0 ; j < component->h ; ++j)
570576 {
571- data[index] = std::clamp<T>(data[index], minimum, maximum);
572- index++;
577+ for (uint32_t i = 0 ; i < component->w ; ++i)
578+ {
579+ data[index] = std::clamp<T>(data[index], minimum, maximum);
580+ index++;
581+ }
582+ index += stride_diff;
573583 }
574- index += stride_diff;
575584 }
576585 component->prec = precision;
577586}
@@ -678,45 +687,53 @@ bool GrkImage::color_esycc_to_rgb(void)
678687 auto yd = (T*)comps[0 ].data ;
679688 auto bd = (T*)comps[1 ].data ;
680689 auto rd = (T*)comps[2 ].data ;
681- for (uint32_t j = 0 ; j < h; ++j)
690+
691+ if constexpr (std::is_same_v<T, int32_t >)
682692 {
683- for (uint32_t i = 0 ; i < w; ++i)
693+ hwy_esycc_to_rgb_i32 (yd, bd, rd, w, h, comps[0 ].stride , max_value, flip_value, sign1, sign2);
694+ }
695+ else
696+ {
697+ for (uint32_t j = 0 ; j < h; ++j)
684698 {
685- T y = yd[dest_index];
686- T cb = bd[dest_index];
687- T cr = rd[dest_index];
688-
689- if (!sign1)
690- cb -= flip_value;
691- if (!sign2)
692- cr -= flip_value;
693-
694- T val = (T)(y - 0.0000368 * cb + 1.40199 * cr + 0.5 );
695-
696- if (val > max_value)
697- val = max_value;
698- else if (val < 0 )
699- val = 0 ;
700- yd[dest_index] = val;
701-
702- val = (T)(1.0003 * y - 0.344125 * cb - 0.7141128 * cr + 0.5 );
703-
704- if (val > max_value)
705- val = max_value;
706- else if (val < 0 )
707- val = 0 ;
708- bd[dest_index] = val;
709-
710- val = (T)(0.999823 * y + 1.77204 * cb - 0.000008 * cr + 0.5 );
711-
712- if (val > max_value)
713- val = max_value;
714- else if (val < 0 )
715- val = 0 ;
716- rd[dest_index] = val;
717- dest_index++;
699+ for (uint32_t i = 0 ; i < w; ++i)
700+ {
701+ T y = yd[dest_index];
702+ T cb = bd[dest_index];
703+ T cr = rd[dest_index];
704+
705+ if (!sign1)
706+ cb -= flip_value;
707+ if (!sign2)
708+ cr -= flip_value;
709+
710+ T val = (T)(y - 0.0000368 * cb + 1.40199 * cr + 0.5 );
711+
712+ if (val > max_value)
713+ val = max_value;
714+ else if (val < 0 )
715+ val = 0 ;
716+ yd[dest_index] = val;
717+
718+ val = (T)(1.0003 * y - 0.344125 * cb - 0.7141128 * cr + 0.5 );
719+
720+ if (val > max_value)
721+ val = max_value;
722+ else if (val < 0 )
723+ val = 0 ;
724+ bd[dest_index] = val;
725+
726+ val = (T)(0.999823 * y + 1.77204 * cb - 0.000008 * cr + 0.5 );
727+
728+ if (val > max_value)
729+ val = max_value;
730+ else if (val < 0 )
731+ val = 0 ;
732+ rd[dest_index] = val;
733+ dest_index++;
734+ }
735+ dest_index += stride_diff;
718736 }
719- dest_index += stride_diff;
720737 }
721738 color_space = GRK_CLRSPC_SRGB;
722739
@@ -853,23 +870,37 @@ void GrkImage::scaleComponent(grk_image_comp* component, uint8_t precision)
853870 if (component->prec < precision)
854871 {
855872 T scale = (T)(1ULL << diff);
856- size_t index = 0 ;
857- for (uint32_t j = 0 ; j < component->h ; ++j)
873+ if constexpr (std::is_same_v<T, int32_t >)
858874 {
859- for (uint32_t i = 0 ; i < component->w ; ++i)
860- data[index++] *= scale;
861- index += stride_diff;
875+ hwy_scale_mul_i32 (data, component->w , component->h , component->stride , scale);
876+ }
877+ else
878+ {
879+ size_t index = 0 ;
880+ for (uint32_t j = 0 ; j < component->h ; ++j)
881+ {
882+ for (uint32_t i = 0 ; i < component->w ; ++i)
883+ data[index++] *= scale;
884+ index += stride_diff;
885+ }
862886 }
863887 }
864888 else
865889 {
866890 T scale = (T)(1ULL << diff);
867- size_t index = 0 ;
868- for (uint32_t j = 0 ; j < component->h ; ++j)
891+ if constexpr (std::is_same_v<T, int32_t >)
869892 {
870- for (uint32_t i = 0 ; i < component->w ; ++i)
871- data[index++] /= scale;
872- index += stride_diff;
893+ hwy_scale_div_i32 (data, component->w , component->h , component->stride , scale);
894+ }
895+ else
896+ {
897+ size_t index = 0 ;
898+ for (uint32_t j = 0 ; j < component->h ; ++j)
899+ {
900+ for (uint32_t i = 0 ; i < component->w ; ++i)
901+ data[index++] /= scale;
902+ index += stride_diff;
903+ }
873904 }
874905 }
875906 component->prec = precision;
@@ -990,16 +1021,24 @@ bool GrkImage::sycc444_to_rgb(void)
9901021 dst->comps [1 ].data = nullptr ;
9911022 dst->comps [2 ].data = nullptr ;
9921023
993- for ( uint32_t j = 0 ; j < h; ++j )
1024+ if constexpr (std::is_same_v<T, int32_t > )
9941025 {
995- for (uint32_t i = 0 ; i < w; ++i)
996- sycc_to_rgb<T>(offset, upb, *y++, *cb++, *cr++, r++, g++, b++);
997- y += src_stride_diff;
998- cb += src_stride_diff;
999- cr += src_stride_diff;
1000- r += dst_stride_diff;
1001- g += dst_stride_diff;
1002- b += dst_stride_diff;
1026+ hwy_sycc444_to_rgb_i32 (y, cb, cr, r, g, b, w, h, comps[0 ].stride , dst->comps [0 ].stride ,
1027+ offset, upb);
1028+ }
1029+ else
1030+ {
1031+ for (uint32_t j = 0 ; j < h; ++j)
1032+ {
1033+ for (uint32_t i = 0 ; i < w; ++i)
1034+ sycc_to_rgb<T>(offset, upb, *y++, *cb++, *cr++, r++, g++, b++);
1035+ y += src_stride_diff;
1036+ cb += src_stride_diff;
1037+ cr += src_stride_diff;
1038+ r += dst_stride_diff;
1039+ g += dst_stride_diff;
1040+ b += dst_stride_diff;
1041+ }
10031042 }
10041043
10051044 all_components_data_free ();
@@ -1735,18 +1774,25 @@ bool GrkImage::applyICC(void)
17351774 auto g = (T*)comps[1 ].data ;
17361775 auto b = (T*)comps[2 ].data ;
17371776
1738- size_t src_index = 0 ;
1739- size_t dest_index = 0 ;
1740- for (uint32_t j = 0 ; j < h; ++j)
1777+ if constexpr (std::is_same_v<T, int32_t >)
17411778 {
1742- for (uint32_t i = 0 ; i < w; ++i)
1779+ hwy_planar_to_packed_8 (r, g, b, inbuf, w, h, comps[0 ].stride );
1780+ }
1781+ else
1782+ {
1783+ size_t src_index = 0 ;
1784+ size_t dest_index = 0 ;
1785+ for (uint32_t j = 0 ; j < h; ++j)
17431786 {
1744- inbuf[dest_index++] = (uint8_t )r[src_index];
1745- inbuf[dest_index++] = (uint8_t )g[src_index];
1746- inbuf[dest_index++] = (uint8_t )b[src_index];
1747- src_index++;
1787+ for (uint32_t i = 0 ; i < w; ++i)
1788+ {
1789+ inbuf[dest_index++] = (uint8_t )r[src_index];
1790+ inbuf[dest_index++] = (uint8_t )g[src_index];
1791+ inbuf[dest_index++] = (uint8_t )b[src_index];
1792+ src_index++;
1793+ }
1794+ src_index += stride_diff;
17481795 }
1749- src_index += stride_diff;
17501796 }
17511797
17521798 if (w > UINT32_MAX / 3 )
@@ -1759,18 +1805,25 @@ bool GrkImage::applyICC(void)
17591805
17601806 cmsDoTransformLineStride (transform, inbuf, outbuf, w, h, 3 * w, 3 * w, 0 , 0 );
17611807
1762- src_index = 0 ;
1763- dest_index = 0 ;
1764- for (uint32_t j = 0 ; j < h; ++j)
1808+ if constexpr (std::is_same_v<T, int32_t >)
17651809 {
1766- for (uint32_t i = 0 ; i < w; ++i)
1810+ hwy_packed_to_planar_8 (outbuf, r, g, b, w, h, comps[0 ].stride );
1811+ }
1812+ else
1813+ {
1814+ size_t src_index = 0 ;
1815+ size_t dest_index = 0 ;
1816+ for (uint32_t j = 0 ; j < h; ++j)
17671817 {
1768- r[dest_index] = (T)outbuf[src_index++];
1769- g[dest_index] = (T)outbuf[src_index++];
1770- b[dest_index] = (T)outbuf[src_index++];
1771- dest_index++;
1818+ for (uint32_t i = 0 ; i < w; ++i)
1819+ {
1820+ r[dest_index] = (T)outbuf[src_index++];
1821+ g[dest_index] = (T)outbuf[src_index++];
1822+ b[dest_index] = (T)outbuf[src_index++];
1823+ dest_index++;
1824+ }
1825+ dest_index += stride_diff;
17721826 }
1773- dest_index += stride_diff;
17741827 }
17751828 delete[] inbuf;
17761829 delete[] outbuf;
@@ -1790,18 +1843,25 @@ bool GrkImage::applyICC(void)
17901843 auto g = (T*)comps[1 ].data ;
17911844 auto b = (T*)comps[2 ].data ;
17921845
1793- size_t src_index = 0 ;
1794- size_t dest_index = 0 ;
1795- for (uint32_t j = 0 ; j < h; ++j)
1846+ if constexpr (std::is_same_v<T, int32_t >)
17961847 {
1797- for (uint32_t i = 0 ; i < w; ++i)
1848+ hwy_planar_to_packed_16 (r, g, b, inbuf, w, h, comps[0 ].stride );
1849+ }
1850+ else
1851+ {
1852+ size_t src_index = 0 ;
1853+ size_t dest_index = 0 ;
1854+ for (uint32_t j = 0 ; j < h; ++j)
17981855 {
1799- inbuf[dest_index++] = (uint16_t )r[src_index];
1800- inbuf[dest_index++] = (uint16_t )g[src_index];
1801- inbuf[dest_index++] = (uint16_t )b[src_index];
1802- src_index++;
1856+ for (uint32_t i = 0 ; i < w; ++i)
1857+ {
1858+ inbuf[dest_index++] = (uint16_t )r[src_index];
1859+ inbuf[dest_index++] = (uint16_t )g[src_index];
1860+ inbuf[dest_index++] = (uint16_t )b[src_index];
1861+ src_index++;
1862+ }
1863+ src_index += stride_diff;
18031864 }
1804- src_index += stride_diff;
18051865 }
18061866
18071867 if (w > UINT32_MAX / (3 * sizeof (uint16_t )))
@@ -1813,18 +1873,25 @@ bool GrkImage::applyICC(void)
18131873 }
18141874 cmsDoTransformLineStride (transform, inbuf, outbuf, w, h, 3 * w * sizeof (uint16_t ),
18151875 3 * w * sizeof (uint16_t ), 0 , 0 );
1816- src_index = 0 ;
1817- dest_index = 0 ;
1818- for (uint32_t j = 0 ; j < h; ++j)
1876+ if constexpr (std::is_same_v<T, int32_t >)
18191877 {
1820- for (uint32_t i = 0 ; i < w; ++i)
1878+ hwy_packed_to_planar_16 (outbuf, r, g, b, w, h, comps[0 ].stride );
1879+ }
1880+ else
1881+ {
1882+ size_t src_index = 0 ;
1883+ size_t dest_index = 0 ;
1884+ for (uint32_t j = 0 ; j < h; ++j)
18211885 {
1822- r[dest_index] = (T)outbuf[src_index++];
1823- g[dest_index] = (T)outbuf[src_index++];
1824- b[dest_index] = (T)outbuf[src_index++];
1825- dest_index++;
1886+ for (uint32_t i = 0 ; i < w; ++i)
1887+ {
1888+ r[dest_index] = (T)outbuf[src_index++];
1889+ g[dest_index] = (T)outbuf[src_index++];
1890+ b[dest_index] = (T)outbuf[src_index++];
1891+ dest_index++;
1892+ }
1893+ dest_index += stride_diff;
18261894 }
1827- dest_index += stride_diff;
18281895 }
18291896 delete[] inbuf;
18301897 delete[] outbuf;
0 commit comments