From d795f7cd55ac3f68a02709cec625e9c1eb0119a0 Mon Sep 17 00:00:00 2001 From: clundro Date: Sat, 25 Mar 2023 02:22:29 +0800 Subject: [PATCH 1/4] [feat] add format_header 1. still incomplete file. Signed-off-by: clundro --- build_support/format_header.py | 222 +++++++++++++++++++++++++++++++++ build_support/helpers.py | 174 ++++++++++++++++++++++++++ 2 files changed, 396 insertions(+) create mode 100644 build_support/format_header.py create mode 100644 build_support/helpers.py diff --git a/build_support/format_header.py b/build_support/format_header.py new file mode 100644 index 000000000..c46071cb3 --- /dev/null +++ b/build_support/format_header.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Modified from the Apache Arrow project for the Terrier project. + +"""Format the ill-formatted code.""" +# ============================================== +# GOAL : Format code, Update headers +# ============================================== + +# ===----------------------------------------------------------------------===// +# +# CMU-DB Project (15-445/645) +# ***DO NO SHARE PUBLICLY*** +# +# Identification: src/include/page/b_plus_tree_page.h +# +# Copyright (c) 2023, Carnegie Mellon University Database Group +# +# ===----------------------------------------------------------------------===// + + +# ref: https://github.com/cmu-db/bustub/blob/master/script/formatting/formatter.py + + +import argparse +import logging +import os +import re +import sys +import datetime +import subprocess +from functools import reduce +from helpers import CLANG_FORMAT, BUSTUB_DIR, CLANG_FORMAT_FILE, LOG,\ + clang_format, hunks_from_staged_files, hunks_from_last_commits + + +BUSTUB_SRC_DIR = os.path.join(BUSTUB_DIR, "src") +BUSTUB_TESTS_DIR = os.path.join(BUSTUB_DIR, "test") + +# DEFAULT DIRS +DEFAULT_DIRS = [] +DEFAULT_DIRS.append(BUSTUB_SRC_DIR) +DEFAULT_DIRS.append(BUSTUB_TESTS_DIR) + +# header framework, dynamic information will be added inside function +header_comment_line_1 = "//===----------------------------------------------------------------------===//\n" +header_comment_line_1 += "//\n" +header_comment_line_1 += "// CMU-DB Project (15-445/645)\n" +header_comment_line_2 = "// ***DO NO SHARE PUBLICLY***\n" +header_comment_line_3 = "// " +header_comment_line_4 = "//\n" +header_comment_line_5 = "// Identification: " +header_comment_line_6 = "//\n" +header_comment_line_7 = "// Copyright (c) %d, Carnegie Mellon University Database Group\n" % datetime.datetime.now().year +header_comment_line_8 = "//\n" +header_comment_line_9 = "//===----------------------------------------------------------------------===//\n\n" + +header_comment_1 = header_comment_line_1 + header_comment_line_2 +header_comment_3 = header_comment_line_4 +header_comment_5 = header_comment_line_6 + header_comment_line_7 \ + + header_comment_line_8 + header_comment_line_9 + +HEADER_REGEX = re.compile( + r"((\/\/===-*===\/\/\n(\/\/.*\n)*\/\/===-*===\/\/[\n]*)\n\n)*") + + +# ============================================== +# UTILITY FUNCTION DEFINITIONS +# ============================================== + + +def format_file(file_path, file_hunks, update_header, clang_format_code): + """Formats the file passed as argument.""" + file_name = os.path.basename(file_path) + abs_path = os.path.abspath(file_path) + rel_path_from_bustub_dir = os.path.relpath(abs_path, BUSTUB_DIR) + + with open(file_path, "r+") as file: + file_data = file.read() + + if update_header: + # strip old header if it exists + header_match = HEADER_REGEX.match(file_data) + if not header_match is None: + LOG.info("Strip header from %s", file_name) + header_comment = header_match.group() + LOG.debug("Header comment : %s", header_comment) + file_data = file_data.replace(header_comment, "") + + # add new header + LOG.info("Add header to %s", file_name) + header_comment_2 = header_comment_line_3 + file_name + "\n" + header_comment_4 = header_comment_line_5\ + + rel_path_from_bustub_dir + "\n" + header_comment = header_comment_1 + header_comment_2 \ + + header_comment_3 + header_comment_4 \ + + header_comment_5 + # print header_comment + + file_data = header_comment + file_data + + file.seek(0, 0) + file.truncate() + file.write(file_data) + + elif clang_format_code: + clang_format(file_path, file_hunks) + + # END WITH +# END FORMAT__FILE(FILE_NAME) + + +def format_dir(dir_path, update_header, clang_format_code): + """Formats all the files in the dir passed as argument.""" + for subdir, _, files in os.walk(dir_path): # _ is for directories. + for file in files: + # print os.path.join(subdir, file) + file_path = subdir + os.path.sep + file + + if file_path.endswith(".h") or file_path.endswith(".cpp"): + format_file(file_path, None, update_header, clang_format_code) + # END IF + # END FOR [file] + # END FOR [os.walk] +# END ADD_HEADERS_DIR(DIR_PATH) + + +# ============================================== +# Main Function +# ============================================== + +if __name__ == '__main__': + + PARSER = argparse.ArgumentParser( + description='Update headers and/or format source code' + ) + + PARSER.add_argument( + "-u", "--update-header", + help='Action: Update existing headers or add new ones', + action='store_true' + ) + PARSER.add_argument( + "-c", "--clang-format-code", + help='Action: Apply clang-format to source code', + action='store_true' + ) + PARSER.add_argument( + "-f", "--staged-files", + help='Action: Apply the selected action(s) to all staged files (git). ' + + '(clang-format will only touch the staged lines)', + action='store_true' + ) + PARSER.add_argument( + "-n", "--number-commits", + help='Action: Apply the selected action(s) to all changes of the last ' + + ' commits (clang-format will only touch the changed lines)', + type=int, default=0 + ) + PARSER.add_argument( + 'paths', metavar='PATH', type=str, nargs='*', + help='Files or directories to (recursively) apply the actions to' + ) + + ARGS = PARSER.parse_args() + + # TARGETS is a list of files with an optional list of hunks, represented as + # pair (start, end) of line numbers, 1 based. + # element of TARGETS: (filename, None) or (filename, [(start,end)]) + + if ARGS.staged_files: + TARGETS = hunks_from_staged_files() + + if not TARGETS: + LOG.error( + "no staged files or not calling from a repository -- exiting" + ) + sys.exit("no staged files or not calling from a repository") + + elif ARGS.number_commits > 0: + TARGETS = hunks_from_last_commits(ARGS.number_commits) + + if not TARGETS: + LOG.error( + "no changes could be extracted for formatting -- exiting" + ) + sys.exit("no changes could be extracted for formatting") + + elif not ARGS.paths: + LOG.error("no files or directories given -- exiting") + sys.exit("no files or directories given") + + else: + TARGETS = [(f, None) for f in ARGS.paths] + + for f, hunks in TARGETS: + if os.path.isfile(f): + LOG.info("Scanning file: %s", f) + format_file(f, hunks, ARGS.update_header, ARGS.clang_format_code) + elif os.path.isdir(f): + LOG.info("Scanning directory %s", f) + format_dir(f, ARGS.update_header, ARGS.clang_format_code) + # FOR +# IF diff --git a/build_support/helpers.py b/build_support/helpers.py new file mode 100644 index 000000000..a219fdfac --- /dev/null +++ b/build_support/helpers.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +"""Common helper functions to be used in different Python scripts.""" +import difflib +import distutils.spawn +import logging +import os +import subprocess +import re + +from functools import reduce + +CODE_SOURCE_DIR = os.path.abspath(os.path.dirname(__file__)) +BUSTUB_DIR = CODE_SOURCE_DIR.replace('/build_support', '') +CLANG_FORMAT_FILE = os.path.join(BUSTUB_DIR, ".clang-format") + +FORMATTING_FILE_WHITELIST = [ + # Fill me +] + +DIFF_FILE_PATT = re.compile(r'^\+\+\+ b\/(.*)') +DIFF_HUNK_PATT = re.compile(r'^@@ \-\d+(,\d+)? \+(\d+)(,)?(\d+)? @@.*') + +# ============================================== +# LOGGING CONFIGURATION +# ============================================== + +LOG = logging.getLogger(__name__) +LOG_HANDLER = logging.StreamHandler() +LOG_FORMATTER = logging.Formatter( + fmt='%(asctime)s [%(funcName)s:%(lineno)03d] %(levelname)-5s: %(message)s', + datefmt='%m-%d-%Y %H:%M:%S' +) +LOG_HANDLER.setFormatter(LOG_FORMATTER) +LOG.addHandler(LOG_HANDLER) +LOG.setLevel(logging.INFO) + + +def find_clangformat(): + """Finds appropriate clang-format executable.""" + # check for possible clang-format versions + path = "" + for exe in ["clang-format", "clang-format-13", "clang-format-14"]: + path = distutils.spawn.find_executable(exe) + if not path is None: + break + return path + + +CLANG_FORMAT = find_clangformat() +CLANG_COMMAND_PREFIX = [CLANG_FORMAT, "-style=file"] + + +def clang_check(file_path, hunks=None): + """Checks and reports bad code formatting.""" + + assert not file_path is None and not file_path == "" + + rel_path_from_peloton_dir = os.path.relpath(file_path, BUSTUB_DIR) + + if rel_path_from_peloton_dir in FORMATTING_FILE_WHITELIST: + return True + + file_status = True + + # Run clang-format on the file and get output (not inline!) + formatted_src = clang_format(file_path, None, inline=False) + + # For Python 3, the above command gives a list of binary sequences, each + # of which has to be converted to string for diff to operate correctly. + # Otherwise, strings would be compared with binary sequences and there + # will always be a big difference. + formatted_src = [line.decode('utf-8') for line in formatted_src] + # Load source file + with open(file_path, "r") as file: + src = file.readlines() + + # Do the diff + difference = difflib.Differ() + diff = difference.compare(src, formatted_src) + line_num = 0 + for line in diff: + code = line[:2] + if code in (" ", "- "): + line_num += 1 + if code == '- ': + if file_status: + LOG.info("Invalid formatting in file : " + file_path) + LOG.info("Line %d: %s", line_num, line[2:].strip()) + file_status = False + + return file_status + + +def clang_format(file_path, hunks=None, inline=True): + """Formats the file at file_path. + 'hunks' can be a list of pairs with (start,end) line numbers, 1 based. + """ + + assert not file_path is None and not file_path == "" + + if CLANG_FORMAT is None: + LOG.error("clang-format seems not installed") + exit() + + formatting_command = CLANG_COMMAND_PREFIX + [file_path] + + if inline: + formatting_command.append("-i") + + if not hunks is None: + for start, end in hunks: + if start > 0 and end > 0: + formatting_command.append("-lines={}:{}".format(start, end)) + + LOG.info(' '.join(formatting_command)) + output = subprocess.check_output(formatting_command).splitlines(True) + return output + + +def hunks_from_last_commits(n): + """ Extract hunks of the last n commits. """ + + assert n > 0 + + diff_output = subprocess.check_output(["git", "diff", "HEAD~"+str(n), "--diff-filter=d", "--unified=0"] + ).decode("utf-8").splitlines() + + return _hunks_from_diff(diff_output) + + +def hunks_from_staged_files(): + diff_output = subprocess.check_output(["git", "diff", "HEAD", + "--cached", "--diff-filter=d", "--unified=0"] + ).decode("utf-8").splitlines() + + return _hunks_from_diff(diff_output) + + +def _hunks_from_diff(diff_output): + """ Parse a diff output and extract the hunks of changed files. + The diff output must not have additional lines! + (use --unified=0) """ + + # TARGETS is a list of files with an optional list of hunks, represented as + # pair (start, end) of line numbers, 1 based. + # element of TARGETS: (filename, None) or (filename, [(start,end)]) + target_files = [] + + # hunks_current_list serves as a reference to the hunks list of the + # last added file + hunks_current_list = [] + + for line in diff_output: + file_match = DIFF_FILE_PATT.search(line) + hunk_match = DIFF_HUNK_PATT.search(line) + if file_match: + file_path = os.path.abspath(os.path.join(BUSTUB_DIR, + file_match.group(1))) + + hunks_current_list = [] + if file_path.endswith(".h") or file_path.endswith(".cpp"): + target_files.append((file_path, hunks_current_list)) + # If this file is not .cpp/.h the hunks_current_list reference + # will point to an empty list which will be discarded later + elif hunk_match: + # add entry in the hunk list of the last file + if hunk_match.group(4) is None: + hunk = (int(hunk_match.group(2)), int(hunk_match.group(2))) + else: + hunk = (int(hunk_match.group(2)), int(hunk_match.group(2)) + + int(hunk_match.group(4))) + hunks_current_list.append(hunk) + + return target_files From 50245bdf24876a690479e36e3fc632ff43529e7f Mon Sep 17 00:00:00 2001 From: clundro Date: Sat, 25 Mar 2023 02:28:55 +0800 Subject: [PATCH 2/4] [spam] add asf license. Signed-off-by: clundro --- build_support/helpers.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/build_support/helpers.py b/build_support/helpers.py index a219fdfac..fabd49aa6 100644 --- a/build_support/helpers.py +++ b/build_support/helpers.py @@ -1,4 +1,24 @@ #!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Modified from the Apache Arrow project for the Terrier project. + """Common helper functions to be used in different Python scripts.""" import difflib import distutils.spawn From 4123448f3ce22d4cbe800a2e3fc3f844e93e6c96 Mon Sep 17 00:00:00 2001 From: clundro Date: Sat, 25 Mar 2023 13:05:58 +0800 Subject: [PATCH 3/4] [fix] use format_header to gen header. Signed-off-by: clundro --- build_support/format_header.py | 256 +++++++++------------------------ build_support/helpers.py | 194 ------------------------- 2 files changed, 71 insertions(+), 379 deletions(-) delete mode 100644 build_support/helpers.py diff --git a/build_support/format_header.py b/build_support/format_header.py index c46071cb3..bd48eee4d 100644 --- a/build_support/format_header.py +++ b/build_support/format_header.py @@ -1,57 +1,48 @@ #!/usr/bin/env python3 # encoding: utf-8 -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Modified from the Apache Arrow project for the Terrier project. - -"""Format the ill-formatted code.""" -# ============================================== -# GOAL : Format code, Update headers -# ============================================== - -# ===----------------------------------------------------------------------===// -# -# CMU-DB Project (15-445/645) -# ***DO NO SHARE PUBLICLY*** -# -# Identification: src/include/page/b_plus_tree_page.h -# -# Copyright (c) 2023, Carnegie Mellon University Database Group -# -# ===----------------------------------------------------------------------===// +''' template +//===----------------------------------------------------------------------===// +// +// BusTub +// +// rid.h +// +// Identification: src/include/common/rid.h +// +// Copyright (c) 2015-2019, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// +''' +# ref: https://github.com/cmu-db/bustub/blob/master/script/formatting/formatter.py +''' +Usage: example + python format_header.py src/include/storage/page/b_plus_tree_page.h \ + src/include/storage/page/b_plus_tree_leaf_page.h -# ref: https://github.com/cmu-db/bustub/blob/master/script/formatting/formatter.py +then you will get the header in b_plus_tree_page.h. +//===----------------------------------------------------------------------===// +// +// BusTub +// +// b_plus_tree_page.h +// +// Identification: src/include/storage/page/b_plus_tree_page.h +// +// Copyright (c) 2015-2023, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// +similar results insert in b_plus_tree_leaf_page.h. -import argparse -import logging -import os -import re -import sys -import datetime -import subprocess -from functools import reduce -from helpers import CLANG_FORMAT, BUSTUB_DIR, CLANG_FORMAT_FILE, LOG,\ - clang_format, hunks_from_staged_files, hunks_from_last_commits +''' +import os +import datetime +import sys +BUSTUB_DIR = os.path.abspath(os.path.dirname( + __file__)).replace('/build_support', '') BUSTUB_SRC_DIR = os.path.join(BUSTUB_DIR, "src") BUSTUB_TESTS_DIR = os.path.join(BUSTUB_DIR, "test") @@ -60,163 +51,58 @@ DEFAULT_DIRS.append(BUSTUB_SRC_DIR) DEFAULT_DIRS.append(BUSTUB_TESTS_DIR) + # header framework, dynamic information will be added inside function header_comment_line_1 = "//===----------------------------------------------------------------------===//\n" header_comment_line_1 += "//\n" -header_comment_line_1 += "// CMU-DB Project (15-445/645)\n" -header_comment_line_2 = "// ***DO NO SHARE PUBLICLY***\n" +header_comment_line_1 += "// BusTub\n" +header_comment_line_2 = "//\n" header_comment_line_3 = "// " header_comment_line_4 = "//\n" header_comment_line_5 = "// Identification: " header_comment_line_6 = "//\n" -header_comment_line_7 = "// Copyright (c) %d, Carnegie Mellon University Database Group\n" % datetime.datetime.now().year +header_comment_line_7 = "// Copyright (c) 2015-%d, Carnegie Mellon University Database Group\n" % datetime.datetime.now().year header_comment_line_8 = "//\n" header_comment_line_9 = "//===----------------------------------------------------------------------===//\n\n" -header_comment_1 = header_comment_line_1 + header_comment_line_2 -header_comment_3 = header_comment_line_4 -header_comment_5 = header_comment_line_6 + header_comment_line_7 \ - + header_comment_line_8 + header_comment_line_9 +header_comment_line_6 = header_comment_line_6 + \ + header_comment_line_7+header_comment_line_8+header_comment_line_9 -HEADER_REGEX = re.compile( - r"((\/\/===-*===\/\/\n(\/\/.*\n)*\/\/===-*===\/\/[\n]*)\n\n)*") +header_comment_line_1 += header_comment_line_2 -# ============================================== -# UTILITY FUNCTION DEFINITIONS -# ============================================== +def add_file_header(file: str): + """add header to this file.""" + if not file.endswith('.h'): + return + file_path = os.path.join(BUSTUB_DIR, file) + if not os.path.isfile(file_path): + return -def format_file(file_path, file_hunks, update_header, clang_format_code): - """Formats the file passed as argument.""" - file_name = os.path.basename(file_path) - abs_path = os.path.abspath(file_path) - rel_path_from_bustub_dir = os.path.relpath(abs_path, BUSTUB_DIR) + dir_flag = False + for deafult_dir in DEFAULT_DIRS: + if file_path.startswith(deafult_dir): + dir_flag = True + break + if not dir_flag: + return - with open(file_path, "r+") as file: - file_data = file.read() + file_name = os.path.basename(file) - if update_header: - # strip old header if it exists - header_match = HEADER_REGEX.match(file_data) - if not header_match is None: - LOG.info("Strip header from %s", file_name) - header_comment = header_match.group() - LOG.debug("Header comment : %s", header_comment) - file_data = file_data.replace(header_comment, "") + header_comment_filename = header_comment_line_3+file_name+'\n' + header_comment_relpath = header_comment_line_5+file + '\n' + header_commnt = header_comment_line_1 + header_comment_filename + \ + header_comment_line_4+header_comment_relpath+header_comment_line_6 - # add new header - LOG.info("Add header to %s", file_name) - header_comment_2 = header_comment_line_3 + file_name + "\n" - header_comment_4 = header_comment_line_5\ - + rel_path_from_bustub_dir + "\n" - header_comment = header_comment_1 + header_comment_2 \ - + header_comment_3 + header_comment_4 \ - + header_comment_5 - # print header_comment + with open(file_path, 'r+') as f: + # maybe use sed -i '' file + old = f.read() + f.seek(0) + f.write(header_commnt) + f.write(old) - file_data = header_comment + file_data - - file.seek(0, 0) - file.truncate() - file.write(file_data) - - elif clang_format_code: - clang_format(file_path, file_hunks) - - # END WITH -# END FORMAT__FILE(FILE_NAME) - - -def format_dir(dir_path, update_header, clang_format_code): - """Formats all the files in the dir passed as argument.""" - for subdir, _, files in os.walk(dir_path): # _ is for directories. - for file in files: - # print os.path.join(subdir, file) - file_path = subdir + os.path.sep + file - - if file_path.endswith(".h") or file_path.endswith(".cpp"): - format_file(file_path, None, update_header, clang_format_code) - # END IF - # END FOR [file] - # END FOR [os.walk] -# END ADD_HEADERS_DIR(DIR_PATH) - - -# ============================================== -# Main Function -# ============================================== if __name__ == '__main__': - - PARSER = argparse.ArgumentParser( - description='Update headers and/or format source code' - ) - - PARSER.add_argument( - "-u", "--update-header", - help='Action: Update existing headers or add new ones', - action='store_true' - ) - PARSER.add_argument( - "-c", "--clang-format-code", - help='Action: Apply clang-format to source code', - action='store_true' - ) - PARSER.add_argument( - "-f", "--staged-files", - help='Action: Apply the selected action(s) to all staged files (git). ' + - '(clang-format will only touch the staged lines)', - action='store_true' - ) - PARSER.add_argument( - "-n", "--number-commits", - help='Action: Apply the selected action(s) to all changes of the last ' + - ' commits (clang-format will only touch the changed lines)', - type=int, default=0 - ) - PARSER.add_argument( - 'paths', metavar='PATH', type=str, nargs='*', - help='Files or directories to (recursively) apply the actions to' - ) - - ARGS = PARSER.parse_args() - - # TARGETS is a list of files with an optional list of hunks, represented as - # pair (start, end) of line numbers, 1 based. - # element of TARGETS: (filename, None) or (filename, [(start,end)]) - - if ARGS.staged_files: - TARGETS = hunks_from_staged_files() - - if not TARGETS: - LOG.error( - "no staged files or not calling from a repository -- exiting" - ) - sys.exit("no staged files or not calling from a repository") - - elif ARGS.number_commits > 0: - TARGETS = hunks_from_last_commits(ARGS.number_commits) - - if not TARGETS: - LOG.error( - "no changes could be extracted for formatting -- exiting" - ) - sys.exit("no changes could be extracted for formatting") - - elif not ARGS.paths: - LOG.error("no files or directories given -- exiting") - sys.exit("no files or directories given") - - else: - TARGETS = [(f, None) for f in ARGS.paths] - - for f, hunks in TARGETS: - if os.path.isfile(f): - LOG.info("Scanning file: %s", f) - format_file(f, hunks, ARGS.update_header, ARGS.clang_format_code) - elif os.path.isdir(f): - LOG.info("Scanning directory %s", f) - format_dir(f, ARGS.update_header, ARGS.clang_format_code) - # FOR -# IF + for file in sys.argv[1:]: + add_file_header(file) diff --git a/build_support/helpers.py b/build_support/helpers.py deleted file mode 100644 index fabd49aa6..000000000 --- a/build_support/helpers.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/env python3 -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Modified from the Apache Arrow project for the Terrier project. - -"""Common helper functions to be used in different Python scripts.""" -import difflib -import distutils.spawn -import logging -import os -import subprocess -import re - -from functools import reduce - -CODE_SOURCE_DIR = os.path.abspath(os.path.dirname(__file__)) -BUSTUB_DIR = CODE_SOURCE_DIR.replace('/build_support', '') -CLANG_FORMAT_FILE = os.path.join(BUSTUB_DIR, ".clang-format") - -FORMATTING_FILE_WHITELIST = [ - # Fill me -] - -DIFF_FILE_PATT = re.compile(r'^\+\+\+ b\/(.*)') -DIFF_HUNK_PATT = re.compile(r'^@@ \-\d+(,\d+)? \+(\d+)(,)?(\d+)? @@.*') - -# ============================================== -# LOGGING CONFIGURATION -# ============================================== - -LOG = logging.getLogger(__name__) -LOG_HANDLER = logging.StreamHandler() -LOG_FORMATTER = logging.Formatter( - fmt='%(asctime)s [%(funcName)s:%(lineno)03d] %(levelname)-5s: %(message)s', - datefmt='%m-%d-%Y %H:%M:%S' -) -LOG_HANDLER.setFormatter(LOG_FORMATTER) -LOG.addHandler(LOG_HANDLER) -LOG.setLevel(logging.INFO) - - -def find_clangformat(): - """Finds appropriate clang-format executable.""" - # check for possible clang-format versions - path = "" - for exe in ["clang-format", "clang-format-13", "clang-format-14"]: - path = distutils.spawn.find_executable(exe) - if not path is None: - break - return path - - -CLANG_FORMAT = find_clangformat() -CLANG_COMMAND_PREFIX = [CLANG_FORMAT, "-style=file"] - - -def clang_check(file_path, hunks=None): - """Checks and reports bad code formatting.""" - - assert not file_path is None and not file_path == "" - - rel_path_from_peloton_dir = os.path.relpath(file_path, BUSTUB_DIR) - - if rel_path_from_peloton_dir in FORMATTING_FILE_WHITELIST: - return True - - file_status = True - - # Run clang-format on the file and get output (not inline!) - formatted_src = clang_format(file_path, None, inline=False) - - # For Python 3, the above command gives a list of binary sequences, each - # of which has to be converted to string for diff to operate correctly. - # Otherwise, strings would be compared with binary sequences and there - # will always be a big difference. - formatted_src = [line.decode('utf-8') for line in formatted_src] - # Load source file - with open(file_path, "r") as file: - src = file.readlines() - - # Do the diff - difference = difflib.Differ() - diff = difference.compare(src, formatted_src) - line_num = 0 - for line in diff: - code = line[:2] - if code in (" ", "- "): - line_num += 1 - if code == '- ': - if file_status: - LOG.info("Invalid formatting in file : " + file_path) - LOG.info("Line %d: %s", line_num, line[2:].strip()) - file_status = False - - return file_status - - -def clang_format(file_path, hunks=None, inline=True): - """Formats the file at file_path. - 'hunks' can be a list of pairs with (start,end) line numbers, 1 based. - """ - - assert not file_path is None and not file_path == "" - - if CLANG_FORMAT is None: - LOG.error("clang-format seems not installed") - exit() - - formatting_command = CLANG_COMMAND_PREFIX + [file_path] - - if inline: - formatting_command.append("-i") - - if not hunks is None: - for start, end in hunks: - if start > 0 and end > 0: - formatting_command.append("-lines={}:{}".format(start, end)) - - LOG.info(' '.join(formatting_command)) - output = subprocess.check_output(formatting_command).splitlines(True) - return output - - -def hunks_from_last_commits(n): - """ Extract hunks of the last n commits. """ - - assert n > 0 - - diff_output = subprocess.check_output(["git", "diff", "HEAD~"+str(n), "--diff-filter=d", "--unified=0"] - ).decode("utf-8").splitlines() - - return _hunks_from_diff(diff_output) - - -def hunks_from_staged_files(): - diff_output = subprocess.check_output(["git", "diff", "HEAD", - "--cached", "--diff-filter=d", "--unified=0"] - ).decode("utf-8").splitlines() - - return _hunks_from_diff(diff_output) - - -def _hunks_from_diff(diff_output): - """ Parse a diff output and extract the hunks of changed files. - The diff output must not have additional lines! - (use --unified=0) """ - - # TARGETS is a list of files with an optional list of hunks, represented as - # pair (start, end) of line numbers, 1 based. - # element of TARGETS: (filename, None) or (filename, [(start,end)]) - target_files = [] - - # hunks_current_list serves as a reference to the hunks list of the - # last added file - hunks_current_list = [] - - for line in diff_output: - file_match = DIFF_FILE_PATT.search(line) - hunk_match = DIFF_HUNK_PATT.search(line) - if file_match: - file_path = os.path.abspath(os.path.join(BUSTUB_DIR, - file_match.group(1))) - - hunks_current_list = [] - if file_path.endswith(".h") or file_path.endswith(".cpp"): - target_files.append((file_path, hunks_current_list)) - # If this file is not .cpp/.h the hunks_current_list reference - # will point to an empty list which will be discarded later - elif hunk_match: - # add entry in the hunk list of the last file - if hunk_match.group(4) is None: - hunk = (int(hunk_match.group(2)), int(hunk_match.group(2))) - else: - hunk = (int(hunk_match.group(2)), int(hunk_match.group(2)) + - int(hunk_match.group(4))) - hunks_current_list.append(hunk) - - return target_files From 4c53b880ccc8bf6d485495fddaf900c82a9d20da Mon Sep 17 00:00:00 2001 From: clundro Date: Sat, 25 Mar 2023 13:11:46 +0800 Subject: [PATCH 4/4] [spam] delete redundant info. Signed-off-by: clundro --- build_support/format_header.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/build_support/format_header.py b/build_support/format_header.py index bd48eee4d..46ca567d9 100644 --- a/build_support/format_header.py +++ b/build_support/format_header.py @@ -1,18 +1,6 @@ #!/usr/bin/env python3 # encoding: utf-8 -''' template -//===----------------------------------------------------------------------===// -// -// BusTub -// -// rid.h -// -// Identification: src/include/common/rid.h -// -// Copyright (c) 2015-2019, Carnegie Mellon University Database Group -// -//===----------------------------------------------------------------------===// -''' + # ref: https://github.com/cmu-db/bustub/blob/master/script/formatting/formatter.py '''