1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #include "rsCpuIntrinsic.h"
19 #include "rsCpuIntrinsicInlines.h"
20
21 namespace android {
22 namespace renderscript {
23
24
25 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
26 public:
27 void populateScript(Script *) override;
28
29 ~RsdCpuScriptIntrinsicBlend() override;
30 RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
31
32 protected:
33 static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
34 uint32_t xend, uint32_t outstep);
35 };
36
37 } // namespace renderscript
38 } // namespace android
39
40
41 enum {
42 BLEND_CLEAR = 0,
43 BLEND_SRC = 1,
44 BLEND_DST = 2,
45 BLEND_SRC_OVER = 3,
46 BLEND_DST_OVER = 4,
47 BLEND_SRC_IN = 5,
48 BLEND_DST_IN = 6,
49 BLEND_SRC_OUT = 7,
50 BLEND_DST_OUT = 8,
51 BLEND_SRC_ATOP = 9,
52 BLEND_DST_ATOP = 10,
53 BLEND_XOR = 11,
54
55 BLEND_NORMAL = 12,
56 BLEND_AVERAGE = 13,
57 BLEND_MULTIPLY = 14,
58 BLEND_SCREEN = 15,
59 BLEND_DARKEN = 16,
60 BLEND_LIGHTEN = 17,
61 BLEND_OVERLAY = 18,
62 BLEND_HARDLIGHT = 19,
63 BLEND_SOFTLIGHT = 20,
64 BLEND_DIFFERENCE = 21,
65 BLEND_NEGATION = 22,
66 BLEND_EXCLUSION = 23,
67 BLEND_COLOR_DODGE = 24,
68 BLEND_INVERSE_COLOR_DODGE = 25,
69 BLEND_SOFT_DODGE = 26,
70 BLEND_COLOR_BURN = 27,
71 BLEND_INVERSE_COLOR_BURN = 28,
72 BLEND_SOFT_BURN = 29,
73 BLEND_REFLECT = 30,
74 BLEND_GLOW = 31,
75 BLEND_FREEZE = 32,
76 BLEND_HEAT = 33,
77 BLEND_ADD = 34,
78 BLEND_SUBTRACT = 35,
79 BLEND_STAMP = 36,
80 BLEND_RED = 37,
81 BLEND_GREEN = 38,
82 BLEND_BLUE = 39,
83 BLEND_HUE = 40,
84 BLEND_SATURATION = 41,
85 BLEND_COLOR = 42,
86 BLEND_LUMINOSITY = 43
87 };
88
89 #if defined(ARCH_ARM_USE_INTRINSICS)
90 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
91 uint32_t xstart, uint32_t xend);
92 #endif
93
94 #if defined(ARCH_X86_HAVE_SSSE3)
95 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
96 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
97 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
98 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
99 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
100 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
101 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
102 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
103 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
104 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
105 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
106 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
107 #endif
108
109 namespace android {
110 namespace renderscript {
111
kernel(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)112 void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
113 uint32_t xstart, uint32_t xend,
114 uint32_t outstep) {
115 // instep/outstep can be ignored--sizeof(uchar4) known at compile time
116 uchar4 *out = (uchar4 *)info->outPtr[0];
117 uchar4 *in = (uchar4 *)info->inPtr[0];
118 uint32_t x1 = xstart;
119 uint32_t x2 = xend;
120
121 #if defined(ARCH_ARM_USE_INTRINSICS)
122 if (gArchUseSIMD) {
123 if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
124 return;
125 }
126 #endif
127 switch (info->slot) {
128 case BLEND_CLEAR:
129 for (;x1 < x2; x1++, out++) {
130 *out = 0;
131 }
132 break;
133 case BLEND_SRC:
134 for (;x1 < x2; x1++, out++, in++) {
135 *out = *in;
136 }
137 break;
138 //BLEND_DST is a NOP
139 case BLEND_DST:
140 break;
141 case BLEND_SRC_OVER:
142 #if defined(ARCH_X86_HAVE_SSSE3)
143 if (gArchUseSIMD) {
144 if ((x1 + 8) < x2) {
145 uint32_t len = (x2 - x1) >> 3;
146 rsdIntrinsicBlendSrcOver_K(out, in, len);
147 x1 += len << 3;
148 out += len << 3;
149 in += len << 3;
150 }
151 }
152 #endif
153 for (;x1 < x2; x1++, out++, in++) {
154 short4 in_s = convert_short4(*in);
155 short4 out_s = convert_short4(*out);
156 in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
157 *out = convert_uchar4(in_s);
158 }
159 break;
160 case BLEND_DST_OVER:
161 #if defined(ARCH_X86_HAVE_SSSE3)
162 if (gArchUseSIMD) {
163 if ((x1 + 8) < x2) {
164 uint32_t len = (x2 - x1) >> 3;
165 rsdIntrinsicBlendDstOver_K(out, in, len);
166 x1 += len << 3;
167 out += len << 3;
168 in += len << 3;
169 }
170 }
171 #endif
172 for (;x1 < x2; x1++, out++, in++) {
173 short4 in_s = convert_short4(*in);
174 short4 out_s = convert_short4(*out);
175 in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
176 *out = convert_uchar4(in_s);
177 }
178 break;
179 case BLEND_SRC_IN:
180 #if defined(ARCH_X86_HAVE_SSSE3)
181 if (gArchUseSIMD) {
182 if ((x1 + 8) < x2) {
183 uint32_t len = (x2 - x1) >> 3;
184 rsdIntrinsicBlendSrcIn_K(out, in, len);
185 x1 += len << 3;
186 out += len << 3;
187 in += len << 3;
188 }
189 }
190 #endif
191 for (;x1 < x2; x1++, out++, in++) {
192 short4 in_s = convert_short4(*in);
193 in_s = (in_s * out->w) >> (short4)8;
194 *out = convert_uchar4(in_s);
195 }
196 break;
197 case BLEND_DST_IN:
198 #if defined(ARCH_X86_HAVE_SSSE3)
199 if (gArchUseSIMD) {
200 if ((x1 + 8) < x2) {
201 uint32_t len = (x2 - x1) >> 3;
202 rsdIntrinsicBlendDstIn_K(out, in, len);
203 x1 += len << 3;
204 out += len << 3;
205 in += len << 3;
206 }
207 }
208 #endif
209 for (;x1 < x2; x1++, out++, in++) {
210 short4 out_s = convert_short4(*out);
211 out_s = (out_s * in->w) >> (short4)8;
212 *out = convert_uchar4(out_s);
213 }
214 break;
215 case BLEND_SRC_OUT:
216 #if defined(ARCH_X86_HAVE_SSSE3)
217 if (gArchUseSIMD) {
218 if ((x1 + 8) < x2) {
219 uint32_t len = (x2 - x1) >> 3;
220 rsdIntrinsicBlendSrcOut_K(out, in, len);
221 x1 += len << 3;
222 out += len << 3;
223 in += len << 3;
224 }
225 }
226 #endif
227 for (;x1 < x2; x1++, out++, in++) {
228 short4 in_s = convert_short4(*in);
229 in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
230 *out = convert_uchar4(in_s);
231 }
232 break;
233 case BLEND_DST_OUT:
234 #if defined(ARCH_X86_HAVE_SSSE3)
235 if (gArchUseSIMD) {
236 if ((x1 + 8) < x2) {
237 uint32_t len = (x2 - x1) >> 3;
238 rsdIntrinsicBlendDstOut_K(out, in, len);
239 x1 += len << 3;
240 out += len << 3;
241 in += len << 3;
242 }
243 }
244 #endif
245 for (;x1 < x2; x1++, out++, in++) {
246 short4 out_s = convert_short4(*out);
247 out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
248 *out = convert_uchar4(out_s);
249 }
250 break;
251 case BLEND_SRC_ATOP:
252 #if defined(ARCH_X86_HAVE_SSSE3)
253 if (gArchUseSIMD) {
254 if ((x1 + 8) < x2) {
255 uint32_t len = (x2 - x1) >> 3;
256 rsdIntrinsicBlendSrcAtop_K(out, in, len);
257 x1 += len << 3;
258 out += len << 3;
259 in += len << 3;
260 }
261 }
262 #endif
263 for (;x1 < x2; x1++, out++, in++) {
264 short4 in_s = convert_short4(*in);
265 short4 out_s = convert_short4(*out);
266 out_s.xyz = ((in_s.xyz * out_s.w) +
267 (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
268 *out = convert_uchar4(out_s);
269 }
270 break;
271 case BLEND_DST_ATOP:
272 #if defined(ARCH_X86_HAVE_SSSE3)
273 if (gArchUseSIMD) {
274 if ((x1 + 8) < x2) {
275 uint32_t len = (x2 - x1) >> 3;
276 rsdIntrinsicBlendDstAtop_K(out, in, len);
277 x1 += len << 3;
278 out += len << 3;
279 in += len << 3;
280 }
281 }
282 #endif
283 for (;x1 < x2; x1++, out++, in++) {
284 short4 in_s = convert_short4(*in);
285 short4 out_s = convert_short4(*out);
286 out_s.xyz = ((out_s.xyz * in_s.w) +
287 (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
288 out_s.w = in_s.w;
289 *out = convert_uchar4(out_s);
290 }
291 break;
292 case BLEND_XOR:
293 #if defined(ARCH_X86_HAVE_SSSE3)
294 if (gArchUseSIMD) {
295 if ((x1 + 8) < x2) {
296 uint32_t len = (x2 - x1) >> 3;
297 rsdIntrinsicBlendXor_K(out, in, len);
298 x1 += len << 3;
299 out += len << 3;
300 in += len << 3;
301 }
302 }
303 #endif
304 for (;x1 < x2; x1++, out++, in++) {
305 *out = *in ^ *out;
306 }
307 break;
308 case BLEND_NORMAL:
309 ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
310 rsAssert(false);
311 break;
312 case BLEND_AVERAGE:
313 ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
314 rsAssert(false);
315 break;
316 case BLEND_MULTIPLY:
317 #if defined(ARCH_X86_HAVE_SSSE3)
318 if (gArchUseSIMD) {
319 if ((x1 + 8) < x2) {
320 uint32_t len = (x2 - x1) >> 3;
321 rsdIntrinsicBlendMultiply_K(out, in, len);
322 x1 += len << 3;
323 out += len << 3;
324 in += len << 3;
325 }
326 }
327 #endif
328 for (;x1 < x2; x1++, out++, in++) {
329 *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
330 >> (short4)8);
331 }
332 break;
333 case BLEND_SCREEN:
334 ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
335 rsAssert(false);
336 break;
337 case BLEND_DARKEN:
338 ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
339 rsAssert(false);
340 break;
341 case BLEND_LIGHTEN:
342 ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
343 rsAssert(false);
344 break;
345 case BLEND_OVERLAY:
346 ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
347 rsAssert(false);
348 break;
349 case BLEND_HARDLIGHT:
350 ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
351 rsAssert(false);
352 break;
353 case BLEND_SOFTLIGHT:
354 ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
355 rsAssert(false);
356 break;
357 case BLEND_DIFFERENCE:
358 ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
359 rsAssert(false);
360 break;
361 case BLEND_NEGATION:
362 ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
363 rsAssert(false);
364 break;
365 case BLEND_EXCLUSION:
366 ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
367 rsAssert(false);
368 break;
369 case BLEND_COLOR_DODGE:
370 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
371 rsAssert(false);
372 break;
373 case BLEND_INVERSE_COLOR_DODGE:
374 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
375 rsAssert(false);
376 break;
377 case BLEND_SOFT_DODGE:
378 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
379 rsAssert(false);
380 break;
381 case BLEND_COLOR_BURN:
382 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
383 rsAssert(false);
384 break;
385 case BLEND_INVERSE_COLOR_BURN:
386 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
387 rsAssert(false);
388 break;
389 case BLEND_SOFT_BURN:
390 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
391 rsAssert(false);
392 break;
393 case BLEND_REFLECT:
394 ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
395 rsAssert(false);
396 break;
397 case BLEND_GLOW:
398 ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
399 rsAssert(false);
400 break;
401 case BLEND_FREEZE:
402 ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
403 rsAssert(false);
404 break;
405 case BLEND_HEAT:
406 ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
407 rsAssert(false);
408 break;
409 case BLEND_ADD:
410 #if defined(ARCH_X86_HAVE_SSSE3)
411 if (gArchUseSIMD) {
412 if((x1 + 8) < x2) {
413 uint32_t len = (x2 - x1) >> 3;
414 rsdIntrinsicBlendAdd_K(out, in, len);
415 x1 += len << 3;
416 out += len << 3;
417 in += len << 3;
418 }
419 }
420 #endif
421 for (;x1 < x2; x1++, out++, in++) {
422 uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
423 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
424 out->x = (oR + iR) > 255 ? 255 : oR + iR;
425 out->y = (oG + iG) > 255 ? 255 : oG + iG;
426 out->z = (oB + iB) > 255 ? 255 : oB + iB;
427 out->w = (oA + iA) > 255 ? 255 : oA + iA;
428 }
429 break;
430 case BLEND_SUBTRACT:
431 #if defined(ARCH_X86_HAVE_SSSE3)
432 if (gArchUseSIMD) {
433 if((x1 + 8) < x2) {
434 uint32_t len = (x2 - x1) >> 3;
435 rsdIntrinsicBlendSub_K(out, in, len);
436 x1 += len << 3;
437 out += len << 3;
438 in += len << 3;
439 }
440 }
441 #endif
442 for (;x1 < x2; x1++, out++, in++) {
443 int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
444 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
445 out->x = (oR - iR) < 0 ? 0 : oR - iR;
446 out->y = (oG - iG) < 0 ? 0 : oG - iG;
447 out->z = (oB - iB) < 0 ? 0 : oB - iB;
448 out->w = (oA - iA) < 0 ? 0 : oA - iA;
449 }
450 break;
451 case BLEND_STAMP:
452 ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
453 rsAssert(false);
454 break;
455 case BLEND_RED:
456 ALOGE("Called unimplemented blend intrinsic BLEND_RED");
457 rsAssert(false);
458 break;
459 case BLEND_GREEN:
460 ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
461 rsAssert(false);
462 break;
463 case BLEND_BLUE:
464 ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
465 rsAssert(false);
466 break;
467 case BLEND_HUE:
468 ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
469 rsAssert(false);
470 break;
471 case BLEND_SATURATION:
472 ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
473 rsAssert(false);
474 break;
475 case BLEND_COLOR:
476 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
477 rsAssert(false);
478 break;
479 case BLEND_LUMINOSITY:
480 ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
481 rsAssert(false);
482 break;
483
484 default:
485 ALOGE("Called unimplemented value %d", info->slot);
486 rsAssert(false);
487
488 }
489 }
490
491
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)492 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
493 const Script *s, const Element *e)
494 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
495
496 mRootPtr = &kernel;
497 }
498
~RsdCpuScriptIntrinsicBlend()499 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
500 }
501
populateScript(Script * s)502 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
503 s->mHal.info.exportedVariableCount = 0;
504 }
505
rsdIntrinsic_Blend(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)506 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
507 const Script *s, const Element *e) {
508 return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
509 }
510
511 } // namespace renderscript
512 } // namespace android
513