diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoders/StringEncoders.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoders/StringEncoders.scala new file mode 100644 index 000000000..5890e6fb1 --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoders/StringEncoders.scala @@ -0,0 +1,42 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.parser.encoders + +object StringEncoders { + /** + * An encoder from a string to an EBCDIC byte array + * + * @param string An input string + * @param conversionTable A conversion table to use to convert from ASCII to EBCDIC + * @param length The length of the output (in bytes) + * @return A string representation of the binary data + */ + def encodeEbcdicString(string: String, conversionTable: Array[Byte], length: Int): Array[Byte] = { + require(length >= 0, s"Field length cannot be negative, got $length") + + var i = 0 + val buf = new Array[Byte](length) + + while (i < string.length && i < length) { + val asciiByte = string(i).toByte + buf(i) = conversionTable((asciiByte + 256) % 256) + i = i + 1 + } + buf + } + +} diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala index e3e0c35d9..a3bcf3f26 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala @@ -32,6 +32,30 @@ abstract class CodePage extends Serializable { */ protected def ebcdicToAsciiMapping: Array[Char] + /** + * Each class inherited from CodePage should provide its own conversion table + */ + protected def asciiToAsciiMapping: Array[Byte] = { + Array[Byte]( + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x0D.toByte, 0x00.toByte, 0x00.toByte, 0x25.toByte, 0x00.toByte, 0x00.toByte, // 0 - 15 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 16 - 31 + 0x40.toByte, 0x5A.toByte, 0x7F.toByte, 0x7B.toByte, 0x5B.toByte, 0x6C.toByte, 0x50.toByte, 0x7D.toByte, 0x4D.toByte, 0x5D.toByte, 0x5C.toByte, 0x4E.toByte, 0x6B.toByte, 0x60.toByte, 0x4B.toByte, 0x61.toByte, // 32 - 47 + 0xF0.toByte, 0xF1.toByte, 0xF2.toByte, 0xF3.toByte, 0xF4.toByte, 0xF5.toByte, 0xF6.toByte, 0xF7.toByte, 0xF8.toByte, 0xF9.toByte, 0x7A.toByte, 0x5E.toByte, 0x4C.toByte, 0x7E.toByte, 0x6E.toByte, 0x6F.toByte, // 48 - 63 + 0x7C.toByte, 0xC1.toByte, 0xC2.toByte, 0xC3.toByte, 0xC4.toByte, 0xC5.toByte, 0xC6.toByte, 0xC7.toByte, 0xC8.toByte, 0xC9.toByte, 0xD1.toByte, 0xD2.toByte, 0xD3.toByte, 0xD4.toByte, 0xD5.toByte, 0xD6.toByte, // 64 - 79 + 0xD7.toByte, 0xD8.toByte, 0xD9.toByte, 0xE2.toByte, 0xE3.toByte, 0xE4.toByte, 0xE5.toByte, 0xE6.toByte, 0xE7.toByte, 0xE8.toByte, 0xE9.toByte, 0xBA.toByte, 0xE0.toByte, 0xBB.toByte, 0xB0.toByte, 0x6D.toByte, // 80 - 95 + 0x79.toByte, 0x81.toByte, 0x82.toByte, 0x83.toByte, 0x84.toByte, 0x85.toByte, 0x86.toByte, 0x87.toByte, 0x88.toByte, 0x89.toByte, 0x91.toByte, 0x92.toByte, 0x93.toByte, 0x94.toByte, 0x95.toByte, 0x96.toByte, // 96 - 111 + 0x97.toByte, 0x98.toByte, 0x99.toByte, 0xA2.toByte, 0xA3.toByte, 0xA4.toByte, 0xA5.toByte, 0xA6.toByte, 0xA7.toByte, 0xA8.toByte, 0xA9.toByte, 0xC0.toByte, 0x6A.toByte, 0xD0.toByte, 0xA1.toByte, 0x00.toByte, // 112 - 127 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 128 - 143 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 144 - 159 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 160 - 175 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 176 - 191 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 192 - 207 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 208 - 223 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 224 - 239 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte // 240 - 255 + ) + } + /** * Gets a mapping table for EBCDIC to ASCII conversions. Uses underlying protected abstract method to get * the actual table. Checks that the size of the mapping arrays is exactly 256 elements. @@ -51,6 +75,17 @@ abstract class CodePage extends Serializable { } table } + + /** + * Gets a mapping table for ASCII to EBCDIC conversions. Uses underlying protected abstract method to get + * the actual table. + * + * @return An ASCII to EBCDIC conversion table as an array of chars + */ + @throws(classOf[IllegalArgumentException]) + final def getAsciiToEbcdicMapping: Array[Byte] = { + asciiToAsciiMapping + } } object CodePage { diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoders/StringEncodersSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoders/StringEncodersSpec.scala new file mode 100644 index 000000000..4fe47244e --- /dev/null +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoders/StringEncodersSpec.scala @@ -0,0 +1,66 @@ +package za.co.absa.cobrix.cobol.parser.encoders + +import org.scalatest.WordSpec +import za.co.absa.cobrix.cobol.parser.decoders.StringDecoders +import za.co.absa.cobrix.cobol.parser.decoders.StringDecoders.TrimNone +import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePageCommon + +class StringEncodersSpec extends WordSpec { + "encodeEbcdicString" should { + "be able to covert a basic ASCII string to EBCDIC" in { + val input = "0123456789 abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+-=<>[](){},./;:?!|$*~^`#@%_\\\'\"\r\n" + + val codePage = new CodePageCommon + + val ebcdic = StringEncoders.encodeEbcdicString(input, codePage.getAsciiToEbcdicMapping, input.length) + val ascii = StringDecoders.decodeEbcdicString(ebcdic, TrimNone, codePage.getEbcdicToAsciiMapping) + + assert(ascii == input) + } + + "be able to covert shorter strings" in { + val input = "0123456789" + val expected = "01234" + + val codePage = new CodePageCommon + + val ebcdic = StringEncoders.encodeEbcdicString(input, codePage.getAsciiToEbcdicMapping, 5) + val ascii = StringDecoders.decodeEbcdicString(ebcdic, TrimNone, codePage.getEbcdicToAsciiMapping) + + assert(ebcdic.length == 5) + assert(ascii == expected) + } + + "be able to covert longer strings" in { + val input = "012" + val expected = "F0F1F20000" + + val codePage = new CodePageCommon + + val ebcdic = StringEncoders.encodeEbcdicString(input, codePage.getAsciiToEbcdicMapping, 5) + val ebcdicHex = StringDecoders.decodeHex(ebcdic) + + assert(ebcdic.length == 5) + assert(expected == ebcdicHex) + } + + "be able to covert an empty string" in { + val codePage = new CodePageCommon + + val ebcdic = StringEncoders.encodeEbcdicString("", codePage.getAsciiToEbcdicMapping, 0) + + assert(ebcdic.length == 0) + } + + "throws an exception if a negative value was passed" in { + val codePage = new CodePageCommon + + val ex = intercept[IllegalArgumentException] { + StringEncoders.encodeEbcdicString("123", codePage.getAsciiToEbcdicMapping, -1) + } + + assert(ex.getMessage.contains("requirement failed: Field length cannot be negative, got -1")) + } + } + +}