Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions scripts/bmc_techsupport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env python3

"""
bmc_techsupport script.
This script is invoked by the generate_dump script for BMC techsupport fetching,
but also can be invoked manually to trigger and collect BMC debug log dump.

The usage of this script is divided into two parts:
1. Triggering BMC debug log dump Redfish task
* In this case the script triggers a POST request to BMC to start collecting debug log dump.
* In this script we will print the new task-id to the console output
to collect the debug log dump once the task-id has finished.
* This step is non-blocking, task-id is returned immediately.
* It is invoked with the parameter '--mode trigger'
E.g.: /usr/local/bin/bmc_techsupport.py --mode trigger

2. Collecting BMC debug log dump
* In this step we will wait for the task-id to finish if it has not finished.
* Blocking action until we get the file or encounter an ERROR or Timeout.
* It is invoked with the parameter '--mode collect --task <task-id> --path <path>'
E.g.: /usr/local/bin/bmc_techsupport.py --mode collect --path <path> --task <task-id>

Basically, in the generate_dump script we will call the first method
at the beginning of its process and the second method towards the end of the process.
"""


import argparse
import os
import sonic_platform
import time
from sonic_py_common.syslogger import SysLogger


TIMEOUT_FOR_GET_BMC_DEBUG_LOG_DUMP_IN_SECONDS = 60
SYSLOG_IDENTIFIER = "bmc_techsupport"
log = SysLogger(SYSLOG_IDENTIFIER)


class BMCDebugDumpExtractor:
'''
Class to trigger and extract BMC debug log dump
'''

INVALID_TASK_ID = '-1'
TRIGGER_MODE = 'trigger'
COLLECT_MODE = 'collect'

def __init__(self):
platform = sonic_platform.platform.Platform()
chassis = platform.get_chassis()
self.bmc = chassis.get_bmc()

def trigger_debug_dump(self):
'''
Trigger BMC debug log dump and prints the running task id to the console output
'''
try:
task_id = BMCDebugDumpExtractor.INVALID_TASK_ID
log.log_info("Triggering BMC debug log dump Redfish task")
(ret, (task_id, err_msg)) = self.bmc.trigger_bmc_debug_log_dump()
if ret != 0:
raise Exception(err_msg)
log.log_info(f'Successfully triggered BMC debug log dump - Task-id: {task_id}')
except Exception as e:
log.log_error(f'Failed to trigger BMC debug log dump - {str(e)}')
finally:
# generate_dump script captures the task id from the console output via $(...) syntax
print(f'{task_id}')

def extract_debug_dump_file(self, task_id, filepath):
'''
Extract BMC debug log dump file for the given task id and save it to the given filepath
'''
try:
if task_id is None or task_id == BMCDebugDumpExtractor.INVALID_TASK_ID:
raise Exception('Invalid Task-ID')
log_dump_dir = os.path.dirname(filepath)
log_dump_filename = os.path.basename(filepath)
if not log_dump_dir or not log_dump_filename:
raise Exception(f'Invalid given filepath: {filepath}')
if not log_dump_filename.endswith('.tar.xz'):
raise Exception(f'Invalid given filepath extension, should be .tar.xz: {log_dump_filename}')

start_time = time.time()
log.log_info("Collecting BMC debug log dump")
ret, err_msg = self.bmc.get_bmc_debug_log_dump(
task_id=task_id,
filename=log_dump_filename,
path=log_dump_dir,
timeout=TIMEOUT_FOR_GET_BMC_DEBUG_LOG_DUMP_IN_SECONDS
)
end_time = time.time()
duration = end_time - start_time
if ret != 0:
timeout_msg = (
f'BMC debug log dump does not finish within '
f'{TIMEOUT_FOR_GET_BMC_DEBUG_LOG_DUMP_IN_SECONDS} seconds: {err_msg}'
)
log.log_error(timeout_msg)
Comment on lines +96 to +100
Copy link

Copilot AI Nov 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The timeout_msg variable is created but never used. It should either be logged or removed. Consider using timeout_msg in the log.log_error() call instead of just err_msg, or remove the variable if not needed.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

timeout_msg is logged in the log.log_error() call.

raise Exception(err_msg)
log.log_info(f'Finished successfully collecting BMC debug log dump. Duration: {duration} seconds')
except Exception as e:
log.log_error(f'Failed to collect BMC debug log dump - {str(e)}')


def main(mode, task_id, filepath):
try:
extractor = BMCDebugDumpExtractor()
if extractor.bmc is None:
raise Exception('BMC instance is not available')
except Exception as e:
log.log_error(f'Failed to initialize BMCDebugDumpExtractor: {str(e)}')
if mode == BMCDebugDumpExtractor.TRIGGER_MODE:
print(f'{BMCDebugDumpExtractor.INVALID_TASK_ID}')
return
if mode == BMCDebugDumpExtractor.TRIGGER_MODE:
extractor.trigger_debug_dump()
elif mode == BMCDebugDumpExtractor.COLLECT_MODE:
if not task_id or not filepath:
log.log_error("Both --task and --path arguments are required for 'collect' mode")
return
extractor.extract_debug_dump_file(task_id, filepath)


Comment on lines +123 to +125
Copy link

Copilot AI Nov 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When mode is 'collect', the required arguments task_id and filepath are not validated. If either is None, the script will proceed and fail later with unclear errors. Add validation to ensure these arguments are provided when mode is 'collect':

if mode == BMCDebugDumpExtractor.COLLECT_MODE:
    if not task_id or not filepath:
        log.log_error("Both --task and --path arguments are required for 'collect' mode")
        return
Suggested change
extractor.extract_debug_dump_file(task_id, filepath)
if not task_id or not filepath:
log.log_error("Both --task and --path arguments are required for 'collect' mode")
return
extractor.extract_debug_dump_file(task_id, filepath)

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="BMC tech-support generator script.")
parser.add_argument(
'-m', '--mode',
choices=['collect', 'trigger'],
required=True,
help="Mode of operation: 'collect' for collecting debug dump or 'trigger' for triggering debug dump task."
)
parser.add_argument('-p', '--path', help="Path to save the BMC debug log dump file.")
parser.add_argument('-t', '--task', help="Task-ID to monitor and collect the debug dump from.")
args = parser.parse_args()
mode = args.mode
task_id = args.task
filepath = args.path
main(mode, task_id, filepath)
82 changes: 82 additions & 0 deletions scripts/generate_dump
Original file line number Diff line number Diff line change
Expand Up @@ -1950,6 +1950,77 @@ save_log_files() {
enable_logrotate
}

###############################################################################
# Check BMC presence
# Arguments:
# None
# Returns:
# 0 if BMC is supported, 1 otherwise
###############################################################################
is_bmc_supported() {
local platform=$(python3 -c "from sonic_py_common import device_info; print(device_info.get_platform())")
# Check if the required file exists
if [ ! -f /usr/share/sonic/device/$platform/bmc.json ]; then
return 1
else
return 0
fi
}

###############################################################################
# Trigger BMC debug log dump task
# Arguments:
# None
# Returns:
# None
###############################################################################
trigger_bmc_debug_log_dump() {
trap 'handle_error $? $LINENO' ERR
if ! is_bmc_supported; then
echo "INFO: BMC is not found on this platform. Skipping..."
Copy link

Copilot AI Nov 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When BMC is not supported, the function echoes an INFO message and returns, which causes that message to be captured as the task_id value at line 2255. This leads to incorrect behavior:

  1. At line 2256, the check for "-1" fails (since task_id is "INFO: BMC is not found...")
  2. At line 2474, the check != "-1" succeeds, causing collect_bmc_files to be called with the INFO message as the task_id

The function should return "-1" explicitly without echoing the INFO message, or restructure to avoid this issue:

if ! is_bmc_supported; then
    echo "-1"
    return
fi

And move the INFO message to the caller (line 2254-2258) to check for empty/invalid returns.

Suggested change
echo "INFO: BMC is not found on this platform. Skipping..."
echo "-1"

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collect_bmc_files will be called with the INFO message as the task_id.
However, at the top of collect_bmc_files, there is a check for 'is_bmc_supported'. Therefore, we will not use this task_id.

return
fi
# Trigger BMC redfish API to start BMC debug log dump task
local task_id=$(python3 /usr/local/bin/bmc_techsupport.py -m trigger)
echo "$task_id"
}

###############################################################################
# Save BMC debug log dump files
# Globals:
# MKDIR, CP, TARDIR, TECHSUPPORT_TIME_INFO
# Arguments:
# $1 - BMC debug log dump task ID
# Returns:
# None
###############################################################################
collect_bmc_files() {
$MKDIR $V -p $TARDIR/bmc
trap 'handle_error $? $LINENO' ERR
start_t=$(date +%s%3N)
if ! is_bmc_supported; then
return
fi

local bmc_debug_log_dump_task_id=$1
local TARBALL_XZ="/tmp/bmc_debug_log_dump.tar.xz"
# Remove existing tarball files if they exist
[ -f "$TARBALL_XZ" ] && rm -f "$TARBALL_XZ"

# Invoke BMC redfish API to extract BMC debug log dump to "/tmp/bmc_debug_log_dump.tar.xz"
python3 /usr/local/bin/bmc_techsupport.py -m collect -p "$TARBALL_XZ" -t "$bmc_debug_log_dump_task_id"
if [ -f "$TARBALL_XZ" ]; then
$CP $V -rf "$TARBALL_XZ" $TARDIR/bmc
else
echo "ERROR: File $TARBALL_XZ does not exist."
fi

# Cleanup
[ -f "$TARBALL_XZ" ] && rm -f "$TARBALL_XZ"
end_t=$(date +%s%3N)
echo "[ collect_bmc_files ] : $(($end_t-$start_t)) msec" >> $TECHSUPPORT_TIME_INFO
}

###############################################################################
# Save warmboot files
# Globals:
Expand Down Expand Up @@ -2176,6 +2247,12 @@ main() {
echo $BASE > $TECHSUPPORT_TIME_INFO
start_t=$(date +%s%3N)

# Trigger BMC debug log dump task - Must be the first task to run
bmc_debug_log_dump_task_id=$(trigger_bmc_debug_log_dump)
if [ "$bmc_debug_log_dump_task_id" == "-1" ]; then
echo "INFO: Fail to trigger BMC debug log dump. Skipping..."
fi

# Capture /proc state early
save_proc /proc/buddyinfo /proc/cmdline /proc/consoles \
/proc/cpuinfo /proc/devices /proc/diskstats /proc/dma \
Expand Down Expand Up @@ -2389,6 +2466,11 @@ main() {
save_log_files &
save_crash_files &
save_warmboot_files &

if [ "$bmc_debug_log_dump_task_id" != "-1" ]; then
collect_bmc_files $bmc_debug_log_dump_task_id &
fi

wait

save_to_tar
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@
'scripts/memory_threshold_check.py',
'scripts/memory_threshold_check_handler.py',
'scripts/techsupport_cleanup.py',
'scripts/bmc_techsupport.py',
'scripts/storm_control.py',
'scripts/verify_image_sign.sh',
'scripts/verify_image_sign_common.sh',
Expand Down
104 changes: 104 additions & 0 deletions show/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,110 @@ def summary(json):
click.echo("Switch Type: {}".format(switch_type))


# 'bmc' subcommand ("show platform bmc")
@platform.group()
def bmc():
"""Show BMC information"""
pass


# 'summary' subcommand ("show platform bmc summary")
@bmc.command(name='summary')
@click.option('--json', is_flag=True, help="Output in JSON format")
def bmc_summary(json):
"""Show BMC summary information"""
try:
import sonic_platform
chassis = sonic_platform.platform.Platform().get_chassis()
bmc = chassis.get_bmc()

if bmc is None:
click.echo("BMC is not available on this platform")
return

eeprom_info = bmc.get_eeprom()
if not eeprom_info:
click.echo("Failed to retrieve BMC EEPROM information")
return

Comment on lines +85 to +98
Copy link

Copilot AI Nov 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The bmc_summary and eeprom functions have significant code duplication in their initialization and error handling logic (getting the chassis, BMC, and EEPROM info). Consider extracting this common logic into a helper function to improve maintainability:

def _get_bmc_eeprom_info():
    """Helper function to get BMC and EEPROM information."""
    import sonic_platform
    chassis = sonic_platform.platform.Platform().get_chassis()
    bmc = chassis.get_bmc()
    
    if bmc is None:
        return None, None, "BMC is not available on this platform"
    
    eeprom_info = bmc.get_eeprom()
    if not eeprom_info:
        return None, None, "Failed to retrieve BMC EEPROM information"
    
    return bmc, eeprom_info, None

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the helper function cannot have the @click.option, we should indicate the kind of error via the return values, and then check and log accordingly in the original functions. So there is no significant improvement in code reduction.

# Extract the required fields
manufacturer = eeprom_info.get('Manufacturer', 'N/A')
model = eeprom_info.get('Model', 'N/A')
part_number = eeprom_info.get('PartNumber', 'N/A')
power_state = eeprom_info.get('PowerState', 'N/A')
serial_number = eeprom_info.get('SerialNumber', 'N/A')
bmc_version = bmc.get_version()

if json:
bmc_summary = {
'Manufacturer': manufacturer,
'Model': model,
'PartNumber': part_number,
'SerialNumber': serial_number,
'PowerState': power_state,
'FirmwareVersion': bmc_version
}
click.echo(clicommon.json_dump(bmc_summary))
else:
click.echo(f"Manufacturer: {manufacturer}")
click.echo(f"Model: {model}")
click.echo(f"PartNumber: {part_number}")
click.echo(f"SerialNumber: {serial_number}")
click.echo(f"PowerState: {power_state}")
click.echo(f"FirmwareVersion: {bmc_version}")

except Exception as e:
click.echo(f"Error retrieving BMC information: {str(e)}")


# 'eeprom' subcommand ("show platform bmc eeprom")
@bmc.command()
@click.option('--json', is_flag=True, help="Output in JSON format")
def eeprom(json):
"""Show BMC EEPROM information"""
try:
import sonic_platform
chassis = sonic_platform.platform.Platform().get_chassis()
bmc = chassis.get_bmc()

if bmc is None:
click.echo("BMC is not available on this platform")
return

# Get BMC EEPROM information
eeprom_info = bmc.get_eeprom()

if not eeprom_info:
click.echo("Failed to retrieve BMC EEPROM information")
return

# Extract the required fields
manufacturer = eeprom_info.get('Manufacturer', 'N/A')
model = eeprom_info.get('Model', 'N/A')
part_number = eeprom_info.get('PartNumber', 'N/A')
power_state = eeprom_info.get('PowerState', 'N/A')
serial_number = eeprom_info.get('SerialNumber', 'N/A')

if json:
bmc_eeprom = {
'Manufacturer': manufacturer,
'Model': model,
'PartNumber': part_number,
'PowerState': power_state,
'SerialNumber': serial_number
}
click.echo(clicommon.json_dump(bmc_eeprom))
else:
click.echo(f"Manufacturer: {manufacturer}")
click.echo(f"Model: {model}")
click.echo(f"PartNumber: {part_number}")
click.echo(f"PowerState: {power_state}")
click.echo(f"SerialNumber: {serial_number}")

except Exception as e:
click.echo(f"Error retrieving BMC EEPROM information: {str(e)}")


# 'syseeprom' subcommand ("show platform syseeprom")
@platform.command()
@click.option('--verbose', is_flag=True, help="Enable verbose output")
Expand Down
Loading
Loading