Skip to content

#186 Add string encoders #770

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.cobrix.cobol.parser.encoders

object StringEncoders {
/**
* An encoder from a string to an EBCDIC byte array
*
* @param string An input string
* @param conversionTable A conversion table to use to convert from ASCII to EBCDIC
* @param length The length of the output (in bytes)
* @return A string representation of the binary data
*/
def encodeEbcdicString(string: String, conversionTable: Array[Byte], length: Int): Array[Byte] = {
require(length >= 0, s"Field length cannot be negative, got $length")

var i = 0
val buf = new Array[Byte](length)

while (i < string.length && i < length) {
val asciiByte = string(i).toByte
buf(i) = conversionTable((asciiByte + 256) % 256)
i = i + 1
}
buf
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,30 @@ abstract class CodePage extends Serializable {
*/
protected def ebcdicToAsciiMapping: Array[Char]

/**
* Each class inherited from CodePage should provide its own conversion table
*/
protected def asciiToAsciiMapping: Array[Byte] = {
Comment on lines +35 to +38
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix the method name to match its purpose.

The method name asciiToAsciiMapping is incorrect. Based on the documentation comment and usage context, this should be asciiToEbcdicMapping since it provides ASCII-to-EBCDIC conversion.

-  /**
-    * Each class inherited from CodePage should provide its own conversion table
-    */
-  protected def asciiToAsciiMapping: Array[Byte] = {
+  /**
+    * Each class inherited from CodePage should provide its own conversion table
+    */
+  protected def asciiToEbcdicMapping: Array[Byte] = {
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
/**
* Each class inherited from CodePage should provide its own conversion table
*/
protected def asciiToAsciiMapping: Array[Byte] = {
/**
* Each class inherited from CodePage should provide its own conversion table
*/
protected def asciiToEbcdicMapping: Array[Byte] = {
🤖 Prompt for AI Agents
In
cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala
around lines 35 to 38, rename the method asciiToAsciiMapping to
asciiToEbcdicMapping to correctly reflect its purpose of providing
ASCII-to-EBCDIC conversion as indicated by the comment and usage context.

Array[Byte](
0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x0D.toByte, 0x00.toByte, 0x00.toByte, 0x25.toByte, 0x00.toByte, 0x00.toByte, // 0 - 15
0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 16 - 31
0x40.toByte, 0x5A.toByte, 0x7F.toByte, 0x7B.toByte, 0x5B.toByte, 0x6C.toByte, 0x50.toByte, 0x7D.toByte, 0x4D.toByte, 0x5D.toByte, 0x5C.toByte, 0x4E.toByte, 0x6B.toByte, 0x60.toByte, 0x4B.toByte, 0x61.toByte, // 32 - 47
0xF0.toByte, 0xF1.toByte, 0xF2.toByte, 0xF3.toByte, 0xF4.toByte, 0xF5.toByte, 0xF6.toByte, 0xF7.toByte, 0xF8.toByte, 0xF9.toByte, 0x7A.toByte, 0x5E.toByte, 0x4C.toByte, 0x7E.toByte, 0x6E.toByte, 0x6F.toByte, // 48 - 63
0x7C.toByte, 0xC1.toByte, 0xC2.toByte, 0xC3.toByte, 0xC4.toByte, 0xC5.toByte, 0xC6.toByte, 0xC7.toByte, 0xC8.toByte, 0xC9.toByte, 0xD1.toByte, 0xD2.toByte, 0xD3.toByte, 0xD4.toByte, 0xD5.toByte, 0xD6.toByte, // 64 - 79
0xD7.toByte, 0xD8.toByte, 0xD9.toByte, 0xE2.toByte, 0xE3.toByte, 0xE4.toByte, 0xE5.toByte, 0xE6.toByte, 0xE7.toByte, 0xE8.toByte, 0xE9.toByte, 0xBA.toByte, 0xE0.toByte, 0xBB.toByte, 0xB0.toByte, 0x6D.toByte, // 80 - 95
0x79.toByte, 0x81.toByte, 0x82.toByte, 0x83.toByte, 0x84.toByte, 0x85.toByte, 0x86.toByte, 0x87.toByte, 0x88.toByte, 0x89.toByte, 0x91.toByte, 0x92.toByte, 0x93.toByte, 0x94.toByte, 0x95.toByte, 0x96.toByte, // 96 - 111
0x97.toByte, 0x98.toByte, 0x99.toByte, 0xA2.toByte, 0xA3.toByte, 0xA4.toByte, 0xA5.toByte, 0xA6.toByte, 0xA7.toByte, 0xA8.toByte, 0xA9.toByte, 0xC0.toByte, 0x6A.toByte, 0xD0.toByte, 0xA1.toByte, 0x00.toByte, // 112 - 127
0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 128 - 143
0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 144 - 159
0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 160 - 175
0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 176 - 191
0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 192 - 207
0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 208 - 223
0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 224 - 239
0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte // 240 - 255
)
}

/**
* Gets a mapping table for EBCDIC to ASCII conversions. Uses underlying protected abstract method to get
* the actual table. Checks that the size of the mapping arrays is exactly 256 elements.
Expand All @@ -51,6 +75,17 @@ abstract class CodePage extends Serializable {
}
table
}

/**
* Gets a mapping table for ASCII to EBCDIC conversions. Uses underlying protected abstract method to get
* the actual table.
*
* @return An ASCII to EBCDIC conversion table as an array of chars
*/
@throws(classOf[IllegalArgumentException])
final def getAsciiToEbcdicMapping: Array[Byte] = {
asciiToAsciiMapping
}
Comment on lines +79 to +88
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add array size validation for consistency.

The public accessor should validate the conversion table size like the existing getEbcdicToAsciiMapping method does. This ensures consistency and prevents runtime issues with malformed conversion tables.

  @throws(classOf[IllegalArgumentException])
  final def getAsciiToEbcdicMapping: Array[Byte] = {
-    asciiToAsciiMapping
+    val ConversionTableElements = 256
+    val table = asciiToEbcdicMapping
+    if (table.length != ConversionTableElements) {
+      throw new IllegalArgumentException(
+        s"An ASCII to EBCDIC conversion table should have exactly $ConversionTableElements elements. It has ${table.length} elements.")
+    }
+    table
  }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
/**
* Gets a mapping table for ASCII to EBCDIC conversions. Uses underlying protected abstract method to get
* the actual table.
*
* @return An ASCII to EBCDIC conversion table as an array of chars
*/
@throws(classOf[IllegalArgumentException])
final def getAsciiToEbcdicMapping: Array[Byte] = {
asciiToAsciiMapping
}
@throws(classOf[IllegalArgumentException])
final def getAsciiToEbcdicMapping: Array[Byte] = {
val ConversionTableElements = 256
val table = asciiToEbcdicMapping
if (table.length != ConversionTableElements) {
throw new IllegalArgumentException(
s"An ASCII to EBCDIC conversion table should have exactly $ConversionTableElements elements. It has ${table.length} elements.")
}
table
}
🤖 Prompt for AI Agents
In
cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala
around lines 79 to 88, the getAsciiToEbcdicMapping method returns the
asciiToAsciiMapping array without validating its size. To fix this, add a check
to verify that the returned array has the expected length, similar to the
validation done in getEbcdicToAsciiMapping. If the size is incorrect, throw an
IllegalArgumentException to prevent runtime issues caused by malformed
conversion tables.

}

object CodePage {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package za.co.absa.cobrix.cobol.parser.encoders

import org.scalatest.WordSpec
import za.co.absa.cobrix.cobol.parser.decoders.StringDecoders
import za.co.absa.cobrix.cobol.parser.decoders.StringDecoders.TrimNone
import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePageCommon

class StringEncodersSpec extends WordSpec {
"encodeEbcdicString" should {
"be able to covert a basic ASCII string to EBCDIC" in {
val input = "0123456789 abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+-=<>[](){},./;:?!|$*~^`#@%_\\\'\"\r\n"

val codePage = new CodePageCommon

val ebcdic = StringEncoders.encodeEbcdicString(input, codePage.getAsciiToEbcdicMapping, input.length)
val ascii = StringDecoders.decodeEbcdicString(ebcdic, TrimNone, codePage.getEbcdicToAsciiMapping)

assert(ascii == input)
}

"be able to covert shorter strings" in {
val input = "0123456789"
val expected = "01234"

val codePage = new CodePageCommon

val ebcdic = StringEncoders.encodeEbcdicString(input, codePage.getAsciiToEbcdicMapping, 5)
val ascii = StringDecoders.decodeEbcdicString(ebcdic, TrimNone, codePage.getEbcdicToAsciiMapping)

assert(ebcdic.length == 5)
assert(ascii == expected)
}

"be able to covert longer strings" in {
val input = "012"
val expected = "F0F1F20000"

val codePage = new CodePageCommon

val ebcdic = StringEncoders.encodeEbcdicString(input, codePage.getAsciiToEbcdicMapping, 5)
val ebcdicHex = StringDecoders.decodeHex(ebcdic)

assert(ebcdic.length == 5)
assert(expected == ebcdicHex)
}

"be able to covert an empty string" in {
val codePage = new CodePageCommon

val ebcdic = StringEncoders.encodeEbcdicString("", codePage.getAsciiToEbcdicMapping, 0)

assert(ebcdic.length == 0)
}

"throws an exception if a negative value was passed" in {
val codePage = new CodePageCommon

val ex = intercept[IllegalArgumentException] {
StringEncoders.encodeEbcdicString("123", codePage.getAsciiToEbcdicMapping, -1)
}

assert(ex.getMessage.contains("requirement failed: Field length cannot be negative, got -1"))
}
}

}