Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -92,25 +92,34 @@ public void printHorizontalBorder() {
String body = "\t\t\tint indexDest = output.startIndex + y*output.stride;\n" +
"\t\t\tint j = input.startIndex + y*input.stride;\n" +
"\n" +
"\t\t\tfor (int i = 0; i < offset; i++) {\n" +
"\t\t\t\tint jEnd = j + i + length - offset;\n" +
"\t\t\t\t" + sumType + " total = 0;\n" +
"\t\t\t" + sumType + " total = 0;\n" +
"\t\t\tint count = length - offset;\n" +
"\t\t\tint jEnd = j + count;\n" +
"\t\t\tif (offset > 0) {\n" +
"\t\t\t\tfor (int indexSrc = j; indexSrc < jEnd; indexSrc++) {\n" +
"\t\t\t\t\ttotal += dataSrc[indexSrc]" + bitWise + ";\n" +
"\t\t\t\t}\n" +
"\t\t\t\tint count = jEnd - j;\n" +
"\t\t\t\tdataDst[indexDest++] = " + divide + ";\n" +
"\t\t\t}\n" +
"\n" +
"\t\t\tint jEnd = j + width;\n" +
"\t\t\tj += width - (offset + offsetR);\n" +
"\t\t\tindexDest += width - (offset + offsetR);\n" +
"\t\t\tfor (int i = 0; i < offsetR; i++) {\n" +
"\t\t\t\t" + sumType + " total = 0;\n" +
"\t\t\t\tfor (int indexSrc = j + i; indexSrc < jEnd; indexSrc++) {\n" +
"\t\t\twhile (++count < length) {\n" +
"\t\t\t\ttotal += dataSrc[jEnd++]" + bitWise + ";\n" +
"\t\t\t\tdataDst[indexDest++] = " + divide + ";\n" +
"\t\t\t}\n" +
"\n" +
"\t\t\tjEnd = j + width;\n" +
"\t\t\tcount = offset + offsetR;\n" +
"\t\t\tj += width - count;\n" +
"\t\t\tindexDest += width - count;\n" +
"\t\t\ttotal = 0;\n" +
"\t\t\tif (offsetR > 0) {\n" +
"\t\t\t\tfor (int indexSrc = j; indexSrc < jEnd; indexSrc++) {\n" +
"\t\t\t\t\ttotal += dataSrc[indexSrc]" + bitWise + ";\n" +
"\t\t\t\t}\n" +
"\t\t\t\tint count = jEnd - j - i;\n" +
"\t\t\t\tdataDst[indexDest++] = " + divide + ";\n" +
"\t\t\t}\n" +
"\t\t\twhile (--count > offset) {\n" +
"\t\t\t\ttotal -= dataSrc[j++]" + bitWise + ";\n" +
"\t\t\t\tdataDst[indexDest++] = " + divide + ";\n" +
"\t\t\t}\n";

Expand All @@ -125,7 +134,7 @@ public void printHorizontal() {
String bitWise = imageIn.getBitWise();

String declareHalf = imageIn.isInteger() ? "\t\tfinal " + sumType + " halfDivisor = divisor/2;\n" : "";
String divide = imageIn.isInteger() ? "(total+halfDivisor)/divisor" : "total/divisor";
String divide = imageIn.isInteger() ? "(total + halfDivisor)/divisor" : "total/divisor";

out.print("\tpublic static void horizontal( " + imageIn.getSingleBandName() + " input," + imageOut.getSingleBandName() + " output, int offset, int length ) {\n" +
"\t\tfinal " + sumType + " divisor = length;\n" +
Expand Down Expand Up @@ -181,18 +190,26 @@ public void printVerticalBorder() {
"\n" +
"\t\t// Image Top\n" +
"\t\tfor (int count = length - offset; count < length; count++) {\n" +
"\t\t\t{\n" +
"\t\t\t\tint indexIn = input.startIndex + x0;\n" +
"\t\t\tfinal int indexInRow = input.startIndex + x0;\n" +
"\t\t\tif (count == length - offset) {\n" +
"\t\t\t\tint indexIn = indexInRow;\n" +
"\n" +
"\t\t\t\tfor (int x = x0; x < x1; x++) {\n" +
"\t\t\t\t\ttotals[x - x0] = dataSrc[indexIn++]" + bitWise + ";\n" +
"\t\t\t\t}\n" +
"\t\t\t}\n" +
"\t\t\tfor (int y = 1; y < count; y++) {\n" +
"\t\t\t\tint indexIn = input.startIndex + x0 + y*input.stride;\n" +
"\n" +
"\t\t\t\tfor (int x = x0; x < x1; x++) {\n" +
"\t\t\t\t\ttotals[x - x0] += dataSrc[indexIn++]" + bitWise + ";\n" +
"\t\t\t\tfor (int y = 1; y < count; y++) {\n" +
"\t\t\t\t\tindexIn = indexInRow + y*input.stride;\n" +
"\n" +
"\t\t\t\t\tfor (int x = x0; x < x1; x++) {\n" +
"\t\t\t\t\t\ttotals[x - x0] += dataSrc[indexIn++]" + bitWise + ";\n" +
"\t\t\t\t\t}\n" +
"\t\t\t\t}\n" +
"\t\t\t} else {\n" +
"\t\t\t\tint indexIn0 = indexInRow + (count - 1)*input.stride;\n" +
"\t\t\t\tint end = indexIn0 + x1 - x0;\n" +
"\t\t\t\tfor (int i = indexIn0; i < end; i++) {\n" +
"\t\t\t\t\ttotals[i - indexIn0] += dataSrc[i]" + bitWise + ";\n" +
"\t\t\t\t}\n" +
"\t\t\t}\n" +
"\t\t\tint indexOut = output.startIndex + (count - (length - offset))*output.stride;\n" +
Expand All @@ -202,19 +219,27 @@ public void printVerticalBorder() {
"\t\t}\n" +
"\t\t// Image Bottom\n" +
"\t\tfor (int yStart = height - length + 1; yStart < height - offset; yStart++) {\n" +
"\t\t\t{\n" +
"\t\t\t\tint indexIn = input.startIndex + x0 + yStart*input.stride;\n" +
"\t\t\tfinal int indexInRow = input.startIndex + x0;\n" +
"\t\t\tif (yStart == height - length + 1) {\n" +
"\t\t\t\tint indexIn = indexInRow + yStart*input.stride;\n" +
"\n" +
"\t\t\t\tfor (int x = x0; x < x1; x++) {\n" +
"\t\t\t\t\ttotals[x - x0] = dataSrc[indexIn++]" + bitWise + ";\n" +
"\t\t\t\t}\n" +
"\t\t\t}\n" +
"\n" +
"\t\t\tfor (int y = yStart + 1; y < height; y++) {\n" +
"\t\t\t\tint indexIn = input.startIndex + x0 + y*input.stride;\n" +
"\t\t\t\tfor (int y = yStart + 1; y < height; y++) {\n" +
"\t\t\t\t\tindexIn = indexInRow + y*input.stride;\n" +
"\n" +
"\t\t\t\tfor (int x = x0; x < x1; x++) {\n" +
"\t\t\t\t\ttotals[x - x0] += dataSrc[indexIn++]" + bitWise + ";\n" +
"\t\t\t\t\tfor (int x = x0; x < x1; x++) {\n" +
"\t\t\t\t\t\ttotals[x - x0] += dataSrc[indexIn++]" + bitWise + ";\n" +
"\t\t\t\t\t}\n" +
"\t\t\t\t}\n" +
"\t\t\t} else {\n" +
"\t\t\t\tint indexIn0 = indexInRow + (yStart - 1)*input.stride;\n" +
"\t\t\t\tint indexIn1 = indexIn0 + x1 - x0;\n" +
"\n" +
"\t\t\t\tfor (int i = indexIn0; i < indexIn1; i++) {\n" +
"\t\t\t\t\ttotals[i - indexIn0] -= dataSrc[i]" + bitWise + ";\n" +
"\t\t\t\t}\n" +
"\t\t\t}\n" +
"\n" +
Expand All @@ -235,54 +260,60 @@ public void printVertical() {
String bitWise = imageIn.getBitWise();

String declareHalf = imageIn.isInteger() ? "\t\tfinal " + sumType + " halfDivisor = divisor/2;\n" : "";
String divide = imageIn.isInteger() ? "(total + halfDivisor)/divisor" : "total/divisor";
String divide = imageIn.isInteger() ? typeCast + "((total + halfDivisor)/divisor)" : "total/divisor";

String workType = ("DogArray_" + imageIn.getKernelType()).replace("S32", "I32");

out.print("\tpublic static void vertical( " + imageIn.getSingleBandName() + " input, " +
imageOut.getSingleBandName() + " output, int offset, int length, @Nullable GrowArray<" + workType + "> workspaces ) {\n" +
"\t\tworkspaces = BoofMiscOps.checkDeclare(workspaces, " + workType + "::new);\n" +
"\t\tfinal " + workType + " work = workspaces.grow(); //CONCURRENT_REMOVE_LINE\n" +
"\t\tfinal int backStep = length*input.stride;\n" +
"\t\tfinal int offsetEnd = length - offset - 1;\n" +
"\n" +
"\t\tfinal " + sumType + " divisor = length;\n" +
declareHalf +
"\n" +
"\t\t// To reduce cache misses it is processed along rows instead of going down columns, which is\n" +
"\t\t// more natural for a vertical convolution. For parallel processes this requires building\n" +
"\t\t// a book keeping array for each thread.\n");
"\t\tfinal int regionStepY = length*input.stride;\n");

String body = "";

body += "\t\t" + sumType + "[] totals = BoofMiscOps.checkDeclare(work, input.width, false);\n" +
"\t\tfor (int x = 0; x < input.width; x++) {\n" +
"\t\t\tint indexIn = input.startIndex + (y0 - offset)*input.stride + x;\n" +
"\t\t\tint indexOut = output.startIndex + output.stride*y0 + x;\n" +
body += "\t\t" + sumType + "[] totals = BoofMiscOps.checkDeclare(work, x1 - x0, false);\n" +
"\n" +
"\t\t\t" + sumType + " total = 0;\n" +
"\t\t\tint indexEnd = indexIn + input.stride*length;\n" +
"\t\t\tfor (; indexIn < indexEnd; indexIn += input.stride) {\n" +
"\t\t\t\ttotal += input.data[indexIn] " + bitWise + ";\n" +
"\t\t// Sum up along x-axis to avoid cache misses when reading from input image\n" +
"\t\t// Initialize recursion by summing up the first kernels along the x-axis\n" +
"\t\t{\n" +
"\t\t\tint indexIn = input.startIndex + x0;\n" +
"\n" +
"\t\t\tfor (int x = x0; x < x1; x++) {\n" +
"\t\t\t\ttotals[x - x0] = input.data[indexIn++]" + bitWise + ";\n" +
"\t\t\t}\n" +
"\t\t}\n" +
"\t\tfor (int y = 1; y < length; y++) {\n" +
"\t\t\tint indexIn = input.startIndex + y*input.stride + x0;\n" +
"\t\t\tint indexInEnd = indexIn + x1 - x0;\n" +
"\t\t\tfor (int i = indexIn; i < indexInEnd; i++) {\n" +
"\t\t\t\ttotals[i - indexIn] += input.data[i]" + bitWise + ";\n" +
"\t\t\t}\n" +
"\t\t\ttotals[x] = total;\n" +
"\t\t\toutput.data[indexOut] = " + typeCast + "(" + divide + ");\n" +
"\t\t}\n" +
"\n" +
"\t\t// change the order it is processed in to reduce cache misses\n" +
"\t\tfor (int y = y0 + 1; y < y1; y++) {\n" +
"\t\t\tint indexIn = input.startIndex + (y + offsetEnd)*input.stride;\n" +
"\t\t\tint indexOut = output.startIndex + y*output.stride;\n" +
"\n" +
"\t\t\tfor (int x = 0; x < input.width; x++, indexIn++, indexOut++) {\n" +
"\t\t\t\t" + sumType + " total = totals[x] - (input.data[indexIn - backStep]" + bitWise + ");\n" +
"\t\t\t\ttotals[x] = total += input.data[indexIn]" + bitWise + ";\n" +
"\t\tint indexOut = output.startIndex + output.stride*offset + x0;\n" +
"\t\tfor (int x = x0; x < x1; x++, indexOut++) {\n" +
"\t\t\tfinal " + sumType + " total = totals[x - x0];\n" +
"\t\t\toutput.data[indexOut] = " + divide + ";\n" +
"\t\t}\n" +
"\n" +
"\t\t\t\toutput.data[indexOut] = " + typeCast + "(" + divide + ");\n" +
"\t\t// For the reminder we only need to add and remove the first and last elements to update the solution\n" +
"\t\tfor (int y = 0; y < input.height - length; y++) {\n" +
"\t\t\tindexOut = output.startIndex + output.stride*(offset + y + 1) + x0;\n" +
"\t\t\tint indexIn = input.startIndex + y*input.stride + x0;\n" +
"\t\t\tint indexInEnd = indexIn + x1 - x0;\n" +
"\t\t\tfor (int i = indexIn; i < indexInEnd; i++, indexOut++) {\n" +
"\t\t\t\t" + sumType + " total = totals[i - indexIn] - (input.data[i]" + bitWise + ");\n" +
"\t\t\t\ttotal += input.data[i + regionStepY]" + bitWise + ";\n" +
"\t\t\t\toutput.data[indexOut] = " + divide + ";\n" +
"\t\t\t\ttotals[i - indexIn] = total;\n" +
"\t\t\t}\n" +
"\t\t}\n";

printParallelBlock("y0", "y1", "offset", "output.height - offsetEnd", "length", body);
printParallelBlock("x0", "x1", "0", "input.width", "20", body);

out.print("\t}\n\n");
}
Expand Down
Loading