1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20 
21 namespace android {
22 namespace renderscript {
23 
24 
25 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
26 public:
27     void populateScript(Script *) override;
28 
29     ~RsdCpuScriptIntrinsicBlend() override;
30     RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
31 
32 protected:
33     static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
34                        uint32_t xend, uint32_t outstep);
35 };
36 
37 } // namespace renderscript
38 } // namespace android
39 
40 
41 enum {
42     BLEND_CLEAR = 0,
43     BLEND_SRC = 1,
44     BLEND_DST = 2,
45     BLEND_SRC_OVER = 3,
46     BLEND_DST_OVER = 4,
47     BLEND_SRC_IN = 5,
48     BLEND_DST_IN = 6,
49     BLEND_SRC_OUT = 7,
50     BLEND_DST_OUT = 8,
51     BLEND_SRC_ATOP = 9,
52     BLEND_DST_ATOP = 10,
53     BLEND_XOR = 11,
54 
55     BLEND_NORMAL = 12,
56     BLEND_AVERAGE = 13,
57     BLEND_MULTIPLY = 14,
58     BLEND_SCREEN = 15,
59     BLEND_DARKEN = 16,
60     BLEND_LIGHTEN = 17,
61     BLEND_OVERLAY = 18,
62     BLEND_HARDLIGHT = 19,
63     BLEND_SOFTLIGHT = 20,
64     BLEND_DIFFERENCE = 21,
65     BLEND_NEGATION = 22,
66     BLEND_EXCLUSION = 23,
67     BLEND_COLOR_DODGE = 24,
68     BLEND_INVERSE_COLOR_DODGE = 25,
69     BLEND_SOFT_DODGE = 26,
70     BLEND_COLOR_BURN = 27,
71     BLEND_INVERSE_COLOR_BURN = 28,
72     BLEND_SOFT_BURN = 29,
73     BLEND_REFLECT = 30,
74     BLEND_GLOW = 31,
75     BLEND_FREEZE = 32,
76     BLEND_HEAT = 33,
77     BLEND_ADD = 34,
78     BLEND_SUBTRACT = 35,
79     BLEND_STAMP = 36,
80     BLEND_RED = 37,
81     BLEND_GREEN = 38,
82     BLEND_BLUE = 39,
83     BLEND_HUE = 40,
84     BLEND_SATURATION = 41,
85     BLEND_COLOR = 42,
86     BLEND_LUMINOSITY = 43
87 };
88 
89 #if defined(ARCH_ARM_USE_INTRINSICS)
90 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
91                     uint32_t xstart, uint32_t xend);
92 #endif
93 
94 #if defined(ARCH_X86_HAVE_SSSE3)
95 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
96 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
97 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
98 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
99 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
100 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
101 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
102 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
103 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
104 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
105 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
106 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
107 #endif
108 
109 namespace android {
110 namespace renderscript {
111 
kernel(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)112 void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
113                                         uint32_t xstart, uint32_t xend,
114                                         uint32_t outstep) {
115     // instep/outstep can be ignored--sizeof(uchar4) known at compile time
116     uchar4 *out = (uchar4 *)info->outPtr[0];
117     uchar4 *in = (uchar4 *)info->inPtr[0];
118     uint32_t x1 = xstart;
119     uint32_t x2 = xend;
120 
121 #if defined(ARCH_ARM_USE_INTRINSICS)
122     if (gArchUseSIMD) {
123         if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
124             return;
125     }
126 #endif
127     switch (info->slot) {
128     case BLEND_CLEAR:
129         for (;x1 < x2; x1++, out++) {
130             *out = 0;
131         }
132         break;
133     case BLEND_SRC:
134         for (;x1 < x2; x1++, out++, in++) {
135           *out = *in;
136         }
137         break;
138     //BLEND_DST is a NOP
139     case BLEND_DST:
140         break;
141     case BLEND_SRC_OVER:
142     #if defined(ARCH_X86_HAVE_SSSE3)
143         if (gArchUseSIMD) {
144             if ((x1 + 8) < x2) {
145                 uint32_t len = (x2 - x1) >> 3;
146                 rsdIntrinsicBlendSrcOver_K(out, in, len);
147                 x1 += len << 3;
148                 out += len << 3;
149                 in += len << 3;
150             }
151         }
152     #endif
153         for (;x1 < x2; x1++, out++, in++) {
154             short4 in_s = convert_short4(*in);
155             short4 out_s = convert_short4(*out);
156             in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
157             *out = convert_uchar4(in_s);
158         }
159         break;
160     case BLEND_DST_OVER:
161     #if defined(ARCH_X86_HAVE_SSSE3)
162         if (gArchUseSIMD) {
163             if ((x1 + 8) < x2) {
164                 uint32_t len = (x2 - x1) >> 3;
165                 rsdIntrinsicBlendDstOver_K(out, in, len);
166                 x1 += len << 3;
167                 out += len << 3;
168                 in += len << 3;
169             }
170         }
171      #endif
172         for (;x1 < x2; x1++, out++, in++) {
173             short4 in_s = convert_short4(*in);
174             short4 out_s = convert_short4(*out);
175             in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
176             *out = convert_uchar4(in_s);
177         }
178         break;
179     case BLEND_SRC_IN:
180     #if defined(ARCH_X86_HAVE_SSSE3)
181         if (gArchUseSIMD) {
182             if ((x1 + 8) < x2) {
183                 uint32_t len = (x2 - x1) >> 3;
184                 rsdIntrinsicBlendSrcIn_K(out, in, len);
185                 x1 += len << 3;
186                 out += len << 3;
187                 in += len << 3;
188             }
189         }
190     #endif
191         for (;x1 < x2; x1++, out++, in++) {
192             short4 in_s = convert_short4(*in);
193             in_s = (in_s * out->w) >> (short4)8;
194             *out = convert_uchar4(in_s);
195         }
196         break;
197     case BLEND_DST_IN:
198     #if defined(ARCH_X86_HAVE_SSSE3)
199         if (gArchUseSIMD) {
200             if ((x1 + 8) < x2) {
201                 uint32_t len = (x2 - x1) >> 3;
202                 rsdIntrinsicBlendDstIn_K(out, in, len);
203                 x1 += len << 3;
204                 out += len << 3;
205                 in += len << 3;
206             }
207         }
208      #endif
209         for (;x1 < x2; x1++, out++, in++) {
210             short4 out_s = convert_short4(*out);
211             out_s = (out_s * in->w) >> (short4)8;
212             *out = convert_uchar4(out_s);
213         }
214         break;
215     case BLEND_SRC_OUT:
216     #if defined(ARCH_X86_HAVE_SSSE3)
217         if (gArchUseSIMD) {
218             if ((x1 + 8) < x2) {
219                 uint32_t len = (x2 - x1) >> 3;
220                 rsdIntrinsicBlendSrcOut_K(out, in, len);
221                 x1 += len << 3;
222                 out += len << 3;
223                 in += len << 3;
224             }
225         }
226     #endif
227         for (;x1 < x2; x1++, out++, in++) {
228             short4 in_s = convert_short4(*in);
229             in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
230             *out = convert_uchar4(in_s);
231         }
232         break;
233     case BLEND_DST_OUT:
234     #if defined(ARCH_X86_HAVE_SSSE3)
235         if (gArchUseSIMD) {
236             if ((x1 + 8) < x2) {
237                 uint32_t len = (x2 - x1) >> 3;
238                 rsdIntrinsicBlendDstOut_K(out, in, len);
239                 x1 += len << 3;
240                 out += len << 3;
241                 in += len << 3;
242             }
243         }
244     #endif
245         for (;x1 < x2; x1++, out++, in++) {
246             short4 out_s = convert_short4(*out);
247             out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
248             *out = convert_uchar4(out_s);
249         }
250         break;
251     case BLEND_SRC_ATOP:
252     #if defined(ARCH_X86_HAVE_SSSE3)
253         if (gArchUseSIMD) {
254             if ((x1 + 8) < x2) {
255                 uint32_t len = (x2 - x1) >> 3;
256                 rsdIntrinsicBlendSrcAtop_K(out, in, len);
257                 x1 += len << 3;
258                 out += len << 3;
259                 in += len << 3;
260             }
261         }
262     #endif
263         for (;x1 < x2; x1++, out++, in++) {
264             short4 in_s = convert_short4(*in);
265             short4 out_s = convert_short4(*out);
266             out_s.xyz = ((in_s.xyz * out_s.w) +
267               (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
268             *out = convert_uchar4(out_s);
269         }
270         break;
271     case BLEND_DST_ATOP:
272     #if defined(ARCH_X86_HAVE_SSSE3)
273         if (gArchUseSIMD) {
274             if ((x1 + 8) < x2) {
275                 uint32_t len = (x2 - x1) >> 3;
276                 rsdIntrinsicBlendDstAtop_K(out, in, len);
277                 x1 += len << 3;
278                 out += len << 3;
279                 in += len << 3;
280             }
281         }
282      #endif
283         for (;x1 < x2; x1++, out++, in++) {
284             short4 in_s = convert_short4(*in);
285             short4 out_s = convert_short4(*out);
286             out_s.xyz = ((out_s.xyz * in_s.w) +
287               (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
288             out_s.w = in_s.w;
289             *out = convert_uchar4(out_s);
290         }
291         break;
292     case BLEND_XOR:
293     #if defined(ARCH_X86_HAVE_SSSE3)
294         if (gArchUseSIMD) {
295             if ((x1 + 8) < x2) {
296                 uint32_t len = (x2 - x1) >> 3;
297                 rsdIntrinsicBlendXor_K(out, in, len);
298                 x1 += len << 3;
299                 out += len << 3;
300                 in += len << 3;
301             }
302         }
303     #endif
304         for (;x1 < x2; x1++, out++, in++) {
305             *out = *in ^ *out;
306         }
307         break;
308     case BLEND_NORMAL:
309         ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
310         rsAssert(false);
311         break;
312     case BLEND_AVERAGE:
313         ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
314         rsAssert(false);
315         break;
316     case BLEND_MULTIPLY:
317     #if defined(ARCH_X86_HAVE_SSSE3)
318         if (gArchUseSIMD) {
319             if ((x1 + 8) < x2) {
320                 uint32_t len = (x2 - x1) >> 3;
321                 rsdIntrinsicBlendMultiply_K(out, in, len);
322                 x1 += len << 3;
323                 out += len << 3;
324                 in += len << 3;
325             }
326         }
327     #endif
328         for (;x1 < x2; x1++, out++, in++) {
329           *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
330                                 >> (short4)8);
331         }
332         break;
333     case BLEND_SCREEN:
334         ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
335         rsAssert(false);
336         break;
337     case BLEND_DARKEN:
338         ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
339         rsAssert(false);
340         break;
341     case BLEND_LIGHTEN:
342         ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
343         rsAssert(false);
344         break;
345     case BLEND_OVERLAY:
346         ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
347         rsAssert(false);
348         break;
349     case BLEND_HARDLIGHT:
350         ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
351         rsAssert(false);
352         break;
353     case BLEND_SOFTLIGHT:
354         ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
355         rsAssert(false);
356         break;
357     case BLEND_DIFFERENCE:
358         ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
359         rsAssert(false);
360         break;
361     case BLEND_NEGATION:
362         ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
363         rsAssert(false);
364         break;
365     case BLEND_EXCLUSION:
366         ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
367         rsAssert(false);
368         break;
369     case BLEND_COLOR_DODGE:
370         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
371         rsAssert(false);
372         break;
373     case BLEND_INVERSE_COLOR_DODGE:
374         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
375         rsAssert(false);
376         break;
377     case BLEND_SOFT_DODGE:
378         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
379         rsAssert(false);
380         break;
381     case BLEND_COLOR_BURN:
382         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
383         rsAssert(false);
384         break;
385     case BLEND_INVERSE_COLOR_BURN:
386         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
387         rsAssert(false);
388         break;
389     case BLEND_SOFT_BURN:
390         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
391         rsAssert(false);
392         break;
393     case BLEND_REFLECT:
394         ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
395         rsAssert(false);
396         break;
397     case BLEND_GLOW:
398         ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
399         rsAssert(false);
400         break;
401     case BLEND_FREEZE:
402         ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
403         rsAssert(false);
404         break;
405     case BLEND_HEAT:
406         ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
407         rsAssert(false);
408         break;
409     case BLEND_ADD:
410     #if defined(ARCH_X86_HAVE_SSSE3)
411         if (gArchUseSIMD) {
412             if((x1 + 8) < x2) {
413                 uint32_t len = (x2 - x1) >> 3;
414                 rsdIntrinsicBlendAdd_K(out, in, len);
415                 x1 += len << 3;
416                 out += len << 3;
417                 in += len << 3;
418             }
419         }
420     #endif
421         for (;x1 < x2; x1++, out++, in++) {
422             uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
423                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
424             out->x = (oR + iR) > 255 ? 255 : oR + iR;
425             out->y = (oG + iG) > 255 ? 255 : oG + iG;
426             out->z = (oB + iB) > 255 ? 255 : oB + iB;
427             out->w = (oA + iA) > 255 ? 255 : oA + iA;
428         }
429         break;
430     case BLEND_SUBTRACT:
431     #if defined(ARCH_X86_HAVE_SSSE3)
432         if (gArchUseSIMD) {
433             if((x1 + 8) < x2) {
434                 uint32_t len = (x2 - x1) >> 3;
435                 rsdIntrinsicBlendSub_K(out, in, len);
436                 x1 += len << 3;
437                 out += len << 3;
438                 in += len << 3;
439             }
440         }
441     #endif
442         for (;x1 < x2; x1++, out++, in++) {
443             int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
444                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
445             out->x = (oR - iR) < 0 ? 0 : oR - iR;
446             out->y = (oG - iG) < 0 ? 0 : oG - iG;
447             out->z = (oB - iB) < 0 ? 0 : oB - iB;
448             out->w = (oA - iA) < 0 ? 0 : oA - iA;
449         }
450         break;
451     case BLEND_STAMP:
452         ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
453         rsAssert(false);
454         break;
455     case BLEND_RED:
456         ALOGE("Called unimplemented blend intrinsic BLEND_RED");
457         rsAssert(false);
458         break;
459     case BLEND_GREEN:
460         ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
461         rsAssert(false);
462         break;
463     case BLEND_BLUE:
464         ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
465         rsAssert(false);
466         break;
467     case BLEND_HUE:
468         ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
469         rsAssert(false);
470         break;
471     case BLEND_SATURATION:
472         ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
473         rsAssert(false);
474         break;
475     case BLEND_COLOR:
476         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
477         rsAssert(false);
478         break;
479     case BLEND_LUMINOSITY:
480         ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
481         rsAssert(false);
482         break;
483 
484     default:
485         ALOGE("Called unimplemented value %d", info->slot);
486         rsAssert(false);
487 
488     }
489 }
490 
491 
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)492 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
493                                                        const Script *s, const Element *e)
494             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
495 
496     mRootPtr = &kernel;
497 }
498 
~RsdCpuScriptIntrinsicBlend()499 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
500 }
501 
populateScript(Script * s)502 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
503     s->mHal.info.exportedVariableCount = 0;
504 }
505 
rsdIntrinsic_Blend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)506 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
507                                       const Script *s, const Element *e) {
508     return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
509 }
510 
511 } // namespace renderscript
512 } // namespace android
513