12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070 |
- #!/usr/bin/env python3
- """ The Python Hipify script.
- ##
- # Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
- # 2017-2018 Advanced Micro Devices, Inc. and
- # Facebook Inc. All rights reserved.
- #
- # Permission is hereby granted, free of charge, to any person obtaining a copy
- # of this software and associated documentation files (the "Software"), to deal
- # in the Software without restriction, including without limitation the rights
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- # copies of the Software, and to permit persons to whom the Software is
- # furnished to do so, subject to the following conditions:
- #
- # The above copyright notice and this permission notice shall be included in
- # all copies or substantial portions of the Software.
- #
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- # THE SOFTWARE.
- """
- import argparse
- import fnmatch
- import re
- import shutil
- import sys
- import os
- from . import constants
- from .cuda_to_hip_mappings import CUDA_TO_HIP_MAPPINGS
- from .cuda_to_hip_mappings import MATH_TRANSPILATIONS
- from typing import Dict, List, Iterator, Optional
- from collections.abc import Mapping, Iterable
- HipifyResult = Dict[str, Optional[str]]
- HipifyFinalResult = Dict[str, HipifyResult]
- HIPIFY_C_BREADCRUMB = "// !!! This is a file automatically generated by hipify!!!\n"
- HIPIFY_FINAL_RESULT: HipifyFinalResult = {}
- # Hardcode the PyTorch template map
- """This dictionary provides the mapping from PyTorch kernel template types
- to their actual types."""
- PYTORCH_TEMPLATE_MAP = {"Dtype": "scalar_t", "T": "scalar_t"}
- __all__ = ['InputError', 'openf', 'bcolors', 'GeneratedFileCleaner', 'match_extensions', 'matched_files_iter',
- 'preprocess_file_and_save_result', 'compute_stats', 'add_dim3', 'processKernelLaunches', 'find_closure_group',
- 'find_bracket_group', 'find_parentheses_group', 'replace_math_functions', 'hip_header_magic', 'replace_extern_shared',
- 'get_hip_file_path', 'is_out_of_place', 'is_pytorch_file', 'is_cusparse_file', 'is_caffe2_gpu_file',
- 'is_caffe2_gpu_file', 'Trie', 'preprocessor', 'file_specific_replacement', 'file_add_header',
- 'fix_static_global_kernels', 'extract_arguments', 'str2bool', 'hipify']
- class InputError(Exception):
- # Exception raised for errors in the input.
- def __init__(self, message):
- super().__init__(message)
- self.message = message
- def __str__(self):
- return "{}: {}".format("Input error", self.message)
- def openf(filename, mode):
- return open(filename, mode, errors='ignore')
- # Color coding for printing
- class bcolors:
- HEADER = '\033[95m'
- OKBLUE = '\033[94m'
- OKGREEN = '\033[92m'
- WARNING = '\033[93m'
- FAIL = '\033[91m'
- ENDC = '\033[0m'
- BOLD = '\033[1m'
- UNDERLINE = '\033[4m'
- # To the programmer, the output of hipify most likely are intermediates.
- # This class allows users of hipify to ask for a cleanup by running the
- # hipify and compilation in a with instantiating this context manager class
- # with keep_intermediates=False.
- # The main usecase is the cpp_extensions, specifically the load method.
- # It is a good idea to keep intermediates (in case of errors or to
- # not recompile unchanged files), but in cases where you don't want to
- # keep them (e.g. in the CI), this can be used to remove files.
- class GeneratedFileCleaner:
- """Context Manager to clean up generated files"""
- def __init__(self, keep_intermediates=False):
- self.keep_intermediates = keep_intermediates
- self.files_to_clean = set()
- self.dirs_to_clean = []
- def __enter__(self):
- return self
- def open(self, fn, *args, **kwargs):
- if not os.path.exists(fn):
- self.files_to_clean.add(os.path.abspath(fn))
- return open(fn, *args, **kwargs)
- def makedirs(self, dn, exist_ok=False):
- parent, n = os.path.split(dn)
- if not n:
- parent, n = os.path.split(parent)
- if parent and n and not os.path.exists(parent):
- self.makedirs(parent, exist_ok=True)
- if not os.path.isdir(dn) or not exist_ok:
- os.mkdir(dn)
- self.dirs_to_clean.append(os.path.abspath(dn))
- def __exit__(self, type, value, traceback):
- if not self.keep_intermediates:
- for f in self.files_to_clean:
- os.unlink(f)
- for d in self.dirs_to_clean[::-1]:
- os.rmdir(d)
- def match_extensions(filename: str, extensions: Iterable) -> bool:
- """Helper method to see if filename ends with certain extension"""
- return any(filename.endswith(e) for e in extensions)
- def _fnmatch(filepath, patterns):
- return any(fnmatch.fnmatch(filepath, pattern) for pattern in patterns)
- def matched_files_iter(
- root_path: str,
- includes: Iterable = (),
- ignores: Iterable = (),
- extensions: Iterable = (),
- out_of_place_only: bool = False,
- is_pytorch_extension: bool = False) -> Iterator[str]:
- exact_matches = set(includes)
- # This is a very rough heuristic; really, we want to avoid scanning
- # any file which is not checked into source control, but this script
- # needs to work even if you're in a Git or Hg checkout, so easier to
- # just block the biggest time sinks that won't matter in the
- # end.
- for (abs_dirpath, dirs, filenames) in os.walk(root_path, topdown=True):
- rel_dirpath = os.path.relpath(abs_dirpath, root_path)
- if rel_dirpath == '.':
- # Blah blah blah O(n) blah blah
- if ".git" in dirs:
- dirs.remove(".git")
- if "build" in dirs:
- dirs.remove("build")
- if "third_party" in dirs:
- dirs.remove("third_party")
- dirs.append("third_party/nvfuser")
- for filename in filenames:
- filepath = os.path.join(abs_dirpath, filename)
- rel_filepath = os.path.join(rel_dirpath, filename)
- # We respect extensions, UNLESS you wrote the entire
- # filename verbatim, in which case we always accept it
- if (
- _fnmatch(filepath, includes)
- and (not _fnmatch(filepath, ignores))
- and (match_extensions(filepath, extensions) or filepath in exact_matches)
- ):
- if not is_pytorch_extension: # for pytorch extensions, consider all files
- if not is_pytorch_file(rel_filepath) and not is_caffe2_gpu_file(rel_filepath):
- continue
- if out_of_place_only and not is_out_of_place(rel_filepath):
- continue
- yield filepath
- def preprocess_file_and_save_result(
- output_directory: str,
- filepath: str,
- all_files: Iterable,
- header_include_dirs: Iterable,
- stats: Dict[str, List],
- hip_clang_launch: bool,
- is_pytorch_extension: bool,
- clean_ctx: GeneratedFileCleaner,
- show_progress: bool) -> None:
- result = preprocessor(output_directory, filepath, all_files, header_include_dirs, stats,
- hip_clang_launch, is_pytorch_extension, clean_ctx, show_progress)
- fin_path = os.path.abspath(os.path.join(output_directory, filepath))
- # Show what happened
- if show_progress and "ignored" not in str(result["status"]):
- print(
- fin_path, "->",
- result["hipified_path"], result["status"], flush=True)
- HIPIFY_FINAL_RESULT[fin_path] = result
- def compute_stats(stats):
- unsupported_calls = {cuda_call for (cuda_call, _filepath) in stats["unsupported_calls"]}
- # Print the number of unsupported calls
- print("Total number of unsupported CUDA function calls: {0:d}".format(len(unsupported_calls)))
- # Print the list of unsupported calls
- print(", ".join(unsupported_calls))
- # Print the number of kernel launches
- print("\nTotal number of replaced kernel launches: {0:d}".format(len(stats["kernel_launches"])))
- def add_dim3(kernel_string, cuda_kernel):
- '''adds dim3() to the second and third arguments in the kernel launch'''
- count = 0
- closure = 0
- kernel_string = kernel_string.replace("<<<", "").replace(">>>", "")
- arg_locs: List[Dict[str, int]] = [{} for _ in range(2)]
- arg_locs[count]['start'] = 0
- for ind, c in enumerate(kernel_string):
- if count > 1:
- break
- if c == "(":
- closure += 1
- elif c == ")":
- closure -= 1
- if (c == "," or ind == len(kernel_string) - 1) and closure == 0:
- arg_locs[count]['end'] = ind + (c != ",")
- count += 1
- if count < 2:
- arg_locs[count]['start'] = ind + 1
- first_arg_raw = kernel_string[arg_locs[0]['start']:arg_locs[0]['end'] + 1]
- second_arg_raw = kernel_string[arg_locs[1]['start']:arg_locs[1]['end']]
- first_arg_clean = kernel_string[arg_locs[0]['start']:arg_locs[0]['end']].replace("\n", "").strip(" ")
- second_arg_clean = kernel_string[arg_locs[1]['start']:arg_locs[1]['end']].replace("\n", "").strip(" ")
- first_arg_dim3 = "dim3({})".format(first_arg_clean)
- second_arg_dim3 = "dim3({})".format(second_arg_clean)
- first_arg_raw_dim3 = first_arg_raw.replace(first_arg_clean, first_arg_dim3)
- second_arg_raw_dim3 = second_arg_raw.replace(second_arg_clean, second_arg_dim3)
- cuda_kernel = cuda_kernel.replace(first_arg_raw + second_arg_raw, first_arg_raw_dim3 + second_arg_raw_dim3)
- return cuda_kernel
- RE_KERNEL_LAUNCH = re.compile(r'([ ]+)(detail?)::[ ]+\\\n[ ]+')
- def processKernelLaunches(string, stats):
- """ Replace the CUDA style Kernel launches with the HIP style kernel launches."""
- # Concat the namespace with the kernel names. (Find cleaner way of doing this later).
- string = RE_KERNEL_LAUNCH.sub(lambda inp: "{0}{1}::".format(inp.group(1), inp.group(2)), string)
- def grab_method_and_template(in_kernel):
- # The positions for relevant kernel components.
- pos = {
- "kernel_launch": {"start": in_kernel["start"], "end": in_kernel["end"]},
- "kernel_name": {"start": -1, "end": -1},
- "template": {"start": -1, "end": -1}
- }
- # Count for balancing template
- count = {"<>": 0}
- # Status for whether we are parsing a certain item.
- START = 0
- AT_TEMPLATE = 1
- AFTER_TEMPLATE = 2
- AT_KERNEL_NAME = 3
- status = START
- # Parse the string character by character
- for i in range(pos["kernel_launch"]["start"] - 1, -1, -1):
- char = string[i]
- # Handle Templating Arguments
- if status == START or status == AT_TEMPLATE:
- if char == ">":
- if status == START:
- status = AT_TEMPLATE
- pos["template"]["end"] = i
- count["<>"] += 1
- if char == "<":
- count["<>"] -= 1
- if count["<>"] == 0 and (status == AT_TEMPLATE):
- pos["template"]["start"] = i
- status = AFTER_TEMPLATE
- # Handle Kernel Name
- if status != AT_TEMPLATE:
- if string[i].isalnum() or string[i] in {'(', ')', '_', ':', '#'}:
- if status != AT_KERNEL_NAME:
- status = AT_KERNEL_NAME
- pos["kernel_name"]["end"] = i
- # Case: Kernel name starts the string.
- if i == 0:
- pos["kernel_name"]["start"] = 0
- # Finished
- return [(pos["kernel_name"]), (pos["template"]), (pos["kernel_launch"])]
- else:
- # Potential ending point if we're already traversing a kernel's name.
- if status == AT_KERNEL_NAME:
- pos["kernel_name"]["start"] = i
- # Finished
- return [(pos["kernel_name"]), (pos["template"]), (pos["kernel_launch"])]
- def find_kernel_bounds(string):
- """Finds the starting and ending points for all kernel launches in the string."""
- kernel_end = 0
- kernel_positions = []
- # Continue until we cannot find any more kernels anymore.
- while string.find("<<<", kernel_end) != -1:
- # Get kernel starting position (starting from the previous ending point)
- kernel_start = string.find("<<<", kernel_end)
- # Get kernel ending position (adjust end point past the >>>)
- kernel_end = string.find(">>>", kernel_start) + 3
- if kernel_end <= 0:
- raise InputError("no kernel end found")
- # Add to list of traversed kernels
- kernel_positions.append({"start": kernel_start, "end": kernel_end,
- "group": string[kernel_start: kernel_end]})
- return kernel_positions
- # Replace comments and string literals from the code so that find_kernel_bounds does not
- # wrongly capture kernels in comments and string literals.
- # This function replaces them with "x" to keep positions.
- def mask_comments(string):
- in_comment = ''
- prev_c = ''
- new_string = ''
- for c in string:
- if in_comment == '':
- # Outside comments
- if c == '/' and prev_c == '/':
- in_comment = '//'
- elif c == '*' and prev_c == '/':
- in_comment = '/*'
- elif c == '"' and prev_c != '\\' and prev_c != "'":
- in_comment = '"'
- elif in_comment == '//':
- # In // xxx
- if c == '\r' or c == '\n':
- in_comment = ''
- elif in_comment == '/*':
- # In /* xxx */
- if c == '/' and prev_c == '*':
- in_comment = ''
- elif in_comment == '"':
- # In ""
- if c == '"' and prev_c != '\\':
- in_comment = ''
- prev_c = c
- if in_comment == '':
- new_string += c
- else:
- new_string += 'x'
- return new_string
- # Grab positional ranges of all kernel launches
- get_kernel_positions = list(find_kernel_bounds(mask_comments(string)))
- output_string = string
- # Replace each CUDA kernel with a HIP kernel.
- for kernel in get_kernel_positions:
- # Get kernel components
- params = grab_method_and_template(kernel)
- # Find parenthesis after kernel launch
- parenthesis = string.find("(", kernel["end"])
- # Extract cuda kernel
- cuda_kernel = string[params[0]["start"]:parenthesis + 1]
- kernel_string = string[kernel['start']:kernel['end']]
- end_param_index = 0 if params[1]['end'] == -1 else 1
- kernel_name_with_template = string[params[0]['start']:params[end_param_index]['end'] + 1]
- cuda_kernel_dim3 = add_dim3(kernel_string, cuda_kernel)
- # Keep number of kernel launch params consistent (grid dims, group dims, stream, dynamic shared size)
- num_klp = len(extract_arguments(0, kernel["group"].replace("<<<", "(").replace(">>>", ")")))
- hip_kernel = "hipLaunchKernelGGL(" + cuda_kernel_dim3[0:-1].replace(
- ">>>", ", 0" * (4 - num_klp) + ">>>").replace("<<<", ", ").replace(
- ">>>", ", ").replace(kernel_name_with_template, "(" + kernel_name_with_template + ")")
- # Replace cuda kernel with hip kernel
- output_string = output_string.replace(cuda_kernel, hip_kernel)
- # Update the statistics
- stats["kernel_launches"].append(hip_kernel)
- return output_string
- def find_closure_group(input_string, start, group):
- """Generalization for finding a balancing closure group
- if group = ["(", ")"], then finds the first balanced parentheses.
- if group = ["{", "}"], then finds the first balanced bracket.
- Given an input string, a starting position in the input string, and the group type,
- find_closure_group returns the positions of group[0] and group[1] as a tuple.
- Example:
- >>> find_closure_group("(hi)", 0, ["(", ")"])
- (0, 3)
- """
- inside_parenthesis = False
- parens = 0
- pos = start
- p_start, p_end = -1, -1
- while pos < len(input_string):
- if input_string[pos] == group[0]:
- if inside_parenthesis is False:
- inside_parenthesis = True
- parens = 1
- p_start = pos
- else:
- parens += 1
- elif input_string[pos] == group[1] and inside_parenthesis:
- parens -= 1
- if parens == 0:
- p_end = pos
- return p_start, p_end
- pos += 1
- return None, None
- def find_bracket_group(input_string, start):
- """Finds the first balanced parantheses."""
- return find_closure_group(input_string, start, group=["{", "}"])
- def find_parentheses_group(input_string, start):
- """Finds the first balanced bracket."""
- return find_closure_group(input_string, start, group=["(", ")"])
- RE_ASSERT = re.compile(r"\bassert[ ]*\(")
- def replace_math_functions(input_string):
- """FIXME: Temporarily replace std:: invocations of math functions
- with non-std:: versions to prevent linker errors NOTE: This
- can lead to correctness issues when running tests, since the
- correct version of the math function (exp/expf) might not get
- called. Plan is to remove this function once HIP supports
- std:: math function calls inside device code
- """
- output_string = input_string
- for func in MATH_TRANSPILATIONS:
- output_string = output_string.replace(r'{}('.format(func), '{}('.format(MATH_TRANSPILATIONS[func]))
- return output_string
- RE_SYNCTHREADS = re.compile(r":?:?\b(__syncthreads)\b(\w*\()")
- def hip_header_magic(input_string):
- """If the file makes kernel builtin calls and does not include the cuda_runtime.h header,
- then automatically add an #include to match the "magic" includes provided by NVCC.
- TODO:
- Update logic to ignore cases where the cuda_runtime.h is included by another file.
- """
- # Copy the input.
- output_string = input_string
- # Check if one of the following headers is already included.
- headers = ["hip/hip_runtime.h", "hip/hip_runtime_api.h"]
- if any(re.search(r'#include ("{0}"|<{0}>)'.format(ext), output_string) for ext in headers):
- return output_string
- # Rough logic to detect if we're inside device code
- hasDeviceLogic: int
- hasDeviceLogic = "hipLaunchKernelGGL" in output_string
- hasDeviceLogic += "__global__" in output_string
- hasDeviceLogic += "__shared__" in output_string
- hasDeviceLogic += RE_SYNCTHREADS.search(output_string) is not None
- # If device logic found, provide the necessary header.
- if hasDeviceLogic:
- output_string = '#include "hip/hip_runtime.h"\n' + input_string
- return output_string
- RE_EXTERN_SHARED = re.compile(r"extern\s+([\w\(\)]+)?\s*__shared__\s+([\w:<>\s]+)\s+(\w+)\s*\[\s*\]\s*;")
- def replace_extern_shared(input_string):
- """Match extern __shared__ type foo[]; syntax and use HIP_DYNAMIC_SHARED() MACRO instead.
- https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md#__shared__
- Example:
- "extern __shared__ char smemChar[];" => "HIP_DYNAMIC_SHARED( char, smemChar)"
- "extern __shared__ unsigned char smem[];" => "HIP_DYNAMIC_SHARED( unsigned char, my_smem)"
- """
- output_string = input_string
- output_string = RE_EXTERN_SHARED.sub(
- lambda inp: "HIP_DYNAMIC_SHARED({0} {1}, {2})".format(
- inp.group(1) or "", inp.group(2), inp.group(3)), output_string)
- return output_string
- def get_hip_file_path(rel_filepath, is_pytorch_extension=False):
- """
- Returns the new name of the hipified file
- """
- # At the moment, some PyTorch source files are HIPified in place. The predicate
- # is_out_of_place tells us if this is the case or not.
- assert not os.path.isabs(rel_filepath)
- if not is_pytorch_extension and not is_out_of_place(rel_filepath):
- return rel_filepath
- dirpath, filename = os.path.split(rel_filepath)
- root, ext = os.path.splitext(filename)
- # Here's the plan:
- #
- # In general, we need to disambiguate the HIPified filename so that
- # it gets a different name from the original filename, so
- # that we don't overwrite the original file
- #
- # There's a lot of different naming conventions across PyTorch
- # and Caffe2, but the general recipe is to convert occurrences
- # of cuda/gpu to hip, and add hip if there are no occurrences
- # of cuda/gpu anywhere.
- #
- # Concretely, we do the following:
- #
- # - If there is a directory component named "cuda", replace
- # it with "hip", AND
- #
- # - If the file name contains "CUDA", replace it with "HIP", AND
- #
- # - ALWAYS replace '.cu' with '.hip', because those files
- # contain CUDA kernels that needs to be hipified and processed with
- # hip compiler
- #
- # - If we are not hipifying a PyTorch extension, and the parent
- # directory name did not change as a result of the above
- # transformations, insert "hip" in the file path
- # as the direct parent folder of the file
- #
- # - If we are hipifying a PyTorch extension, and the parent directory
- # name as well as the filename (incl. extension) did not change as
- # a result of the above transformations, insert "_hip" in the filename
- #
- # This isn't set in stone; we might adjust this to support other
- # naming conventions.
- if ext == '.cu':
- ext = '.hip'
- orig_filename = filename
- orig_dirpath = dirpath
- dirpath = dirpath.replace('cuda', 'hip')
- dirpath = dirpath.replace('CUDA', 'HIP')
- dirpath = dirpath.replace('THC', 'THH')
- root = root.replace('cuda', 'hip')
- root = root.replace('CUDA', 'HIP')
- # Special case to handle caffe2/core/THCCachingAllocator
- if dirpath != "caffe2/core":
- root = root.replace('THC', 'THH')
- if not is_pytorch_extension and dirpath == orig_dirpath:
- dirpath = os.path.join(dirpath, 'hip')
- if is_pytorch_extension and dirpath == orig_dirpath and (root + ext) == orig_filename:
- root = root + "_hip"
- return os.path.join(dirpath, root + ext)
- def is_out_of_place(rel_filepath):
- assert not os.path.isabs(rel_filepath)
- if rel_filepath.startswith("torch/"):
- return False
- if rel_filepath.startswith("third_party/nvfuser/"):
- return False
- if rel_filepath.startswith("tools/autograd/templates/"):
- return False
- return True
- # Keep this synchronized with includes/ignores in build_amd.py
- def is_pytorch_file(rel_filepath):
- assert not os.path.isabs(rel_filepath)
- if rel_filepath.startswith("aten/"):
- if rel_filepath.startswith("aten/src/ATen/core/"):
- return False
- return True
- if rel_filepath.startswith("torch/"):
- return True
- if rel_filepath.startswith("third_party/nvfuser/"):
- return True
- if rel_filepath.startswith("tools/autograd/templates/"):
- return True
- return False
- def is_cusparse_file(rel_filepath):
- if is_pytorch_file(rel_filepath):
- return "sparse" in rel_filepath.lower()
- return False
- def is_caffe2_gpu_file(rel_filepath):
- assert not os.path.isabs(rel_filepath)
- if rel_filepath.startswith("c10/cuda"):
- return True
- filename = os.path.basename(rel_filepath)
- _, ext = os.path.splitext(filename)
- return ('gpu' in filename or ext in ['.cu', '.cuh']) and ('cudnn' not in filename)
- # Cribbed from https://stackoverflow.com/questions/42742810/speed-up-millions-of-regex-replacements-in-python-3/42789508#42789508
- class Trie():
- """Regex::Trie in Python. Creates a Trie out of a list of words. The trie can be exported to a Regex pattern.
- The corresponding Regex should match much faster than a simple Regex union."""
- def __init__(self):
- self.data = {}
- def add(self, word):
- ref = self.data
- for char in word:
- ref[char] = char in ref and ref[char] or {}
- ref = ref[char]
- ref[''] = 1
- def dump(self):
- return self.data
- def quote(self, char):
- return re.escape(char)
- def _pattern(self, pData):
- data = pData
- if "" in data and len(data.keys()) == 1:
- return None
- alt = []
- cc = []
- q = 0
- for char in sorted(data.keys()):
- if isinstance(data[char], dict):
- try:
- recurse = self._pattern(data[char])
- alt.append(self.quote(char) + recurse)
- except Exception:
- cc.append(self.quote(char))
- else:
- q = 1
- cconly = not len(alt) > 0
- if len(cc) > 0:
- if len(cc) == 1:
- alt.append(cc[0])
- else:
- alt.append('[' + ''.join(cc) + ']')
- if len(alt) == 1:
- result = alt[0]
- else:
- result = "(?:" + "|".join(alt) + ")"
- if q:
- if cconly:
- result += "?"
- else:
- result = "(?:%s)?" % result
- return result
- def pattern(self):
- return self._pattern(self.dump())
- CAFFE2_TRIE = Trie()
- CAFFE2_MAP = {}
- PYTORCH_TRIE = Trie()
- PYTORCH_MAP: Dict[str, object] = {}
- # In PyTorch, we map cuBLAS->rocBLAS and cuSPARSE->hipSPARSE. Note the prefix, roc versus hip.
- # The 'hip' APIs offer a more direct CUDA-friendly mapping, but calling rocBLAS directly has better performance.
- # Unfortunately, the roc* types and hip* types differ, i.e., rocblas_float_complex versus hipComplex.
- # In the case of SPARSE, we must use the hip types for complex instead of the roc types,
- # but the pytorch mappings assume roc. Therefore, we create a new SPARSE mapping that has a higher priority.
- # Its mappings will trigger first, and only when a miss occurs will the lower-priority pytorch mapping take place.
- # When a file contains "sparse" in the filename, a mapping marked with API_SPARSE is preferred over other choices.
- PYTORCH_SPARSE_MAP = {}
- for mapping in CUDA_TO_HIP_MAPPINGS:
- assert isinstance(mapping, Mapping)
- for src, value in mapping.items():
- dst = value[0]
- meta_data = value[1:]
- if constants.API_CAFFE2 not in meta_data:
- PYTORCH_TRIE.add(src)
- # if src is already in PYTORCH_MAP and dst belongs to API_SPARSE
- # do not overwrite PYTORCH_MAP, store dst separately
- if constants.API_SPARSE in meta_data and PYTORCH_MAP.get(src, ""):
- PYTORCH_SPARSE_MAP[src] = dst
- else:
- PYTORCH_MAP[src] = dst
- if constants.API_PYTORCH not in meta_data:
- CAFFE2_TRIE.add(src)
- CAFFE2_MAP[src] = dst
- RE_CAFFE2_PREPROCESSOR = re.compile(CAFFE2_TRIE.pattern())
- RE_PYTORCH_PREPROCESSOR = re.compile(r'(?<=\W)({0})(?=\W)'.format(PYTORCH_TRIE.pattern()))
- RE_QUOTE_HEADER = re.compile(r'#include "([^"]+)"')
- RE_ANGLE_HEADER = re.compile(r'#include <([^>]+)>')
- RE_THC_GENERIC_FILE = re.compile(r'#define THC_GENERIC_FILE "([^"]+)"')
- RE_CU_SUFFIX = re.compile(r'\.cu\b') # be careful not to pick up .cuh
- """
- Returns a dict with the following keys:
- "hipified_path" : absolute path of hipified source file
- "status" : "ok" if hipified file was written out
- "skipped" if an identical hipified file already existed or hipified file couldn't be written out
- "ignored" if the source file was a hipified file itself or not meant to be hipified
- """
- def preprocessor(
- output_directory: str,
- filepath: str,
- all_files: Iterable,
- header_include_dirs: Iterable,
- stats: Dict[str, List],
- hip_clang_launch: bool,
- is_pytorch_extension: bool,
- clean_ctx: GeneratedFileCleaner,
- show_progress: bool) -> HipifyResult:
- """ Executes the CUDA -> HIP conversion on the specified file. """
- if filepath not in all_files:
- return {"hipified_path": None, "status": "[ignored, not to be hipified]"}
- fin_path = os.path.abspath(os.path.join(output_directory, filepath))
- rel_filepath = os.path.relpath(filepath, output_directory)
- with open(fin_path, 'r', encoding='utf-8') as fin:
- if fin.readline() == HIPIFY_C_BREADCRUMB:
- return {"hipified_path": None, "status": "[ignored, input is hipified output]"}
- fin.seek(0)
- output_source = fin.read()
- orig_output_source = output_source
- # get_hip_file_path needs a relative path to work correctly
- fout_path = os.path.abspath(os.path.join(output_directory, get_hip_file_path(rel_filepath, is_pytorch_extension)))
- if not os.path.exists(os.path.dirname(fout_path)):
- clean_ctx.makedirs(os.path.dirname(fout_path))
- # unsupported_calls statistics reporting is broken atm
- def pt_repl(m):
- return PYTORCH_MAP[m.group(0)]
- def pt_sparse_repl(m):
- # checks SPARSE map first, and if a miss occurs, falls back to pytorch mappings
- return PYTORCH_SPARSE_MAP.get(m.group(0), pt_repl(m))
- if is_pytorch_extension:
- output_source = RE_PYTORCH_PREPROCESSOR.sub(pt_repl, output_source)
- else:
- if is_cusparse_file(rel_filepath):
- output_source = RE_PYTORCH_PREPROCESSOR.sub(pt_sparse_repl, output_source)
- elif is_pytorch_file(rel_filepath):
- output_source = RE_PYTORCH_PREPROCESSOR.sub(pt_repl, output_source)
- else:
- def c2_repl(m):
- return CAFFE2_MAP[m.group(0)]
- output_source = RE_CAFFE2_PREPROCESSOR.sub(c2_repl, output_source)
- # Header rewrites
- def mk_repl(templ, include_current_dir=True):
- def repl(m):
- f = m.group(1)
- dirpath, filename = os.path.split(f)
- if (
- f.startswith("ATen/cuda")
- or f.startswith("ATen/native/cuda")
- or f.startswith("ATen/native/nested/cuda")
- or f.startswith("ATen/native/quantized/cuda")
- or f.startswith("ATen/native/sparse/cuda")
- or f.startswith("ATen/native/transformers/cuda")
- or f.startswith("THC/")
- or (f.startswith("THC") and not f.startswith("THCP"))
- ):
- return templ.format(get_hip_file_path(m.group(1), is_pytorch_extension))
- # if filename is one of the files being hipified for this extension
- if (is_pytorch_extension and any(s.endswith(filename) for s in all_files)):
- header_dir = None
- header_filepath = None
- # If include_current_dir True, look first in same dir as the including source file
- if include_current_dir:
- header_dir_to_check = os.path.dirname(fin_path)
- header_path_to_check = os.path.abspath(os.path.join(header_dir_to_check, f))
- if os.path.exists(header_path_to_check):
- header_dir = header_dir_to_check
- header_filepath = header_path_to_check
- # If not found, look in include dirs one by one and first match wins
- if header_filepath is None:
- for header_include_dir in header_include_dirs:
- header_dir_to_check = os.path.join(output_directory, header_include_dir)
- header_path_to_check = os.path.abspath(os.path.join(header_dir_to_check, f))
- if os.path.exists(header_path_to_check):
- header_dir = header_dir_to_check
- header_filepath = header_path_to_check
- # If header file not found, keep as is
- if header_filepath is None:
- return m.group(0)
- # Hipify header file first if needed
- if header_filepath not in HIPIFY_FINAL_RESULT:
- preprocess_file_and_save_result(output_directory,
- header_filepath,
- all_files, header_include_dirs, stats, hip_clang_launch,
- is_pytorch_extension, clean_ctx, show_progress)
- hipified_header_filepath = HIPIFY_FINAL_RESULT[header_filepath]["hipified_path"]
- return templ.format(os.path.relpath(hipified_header_filepath if hipified_header_filepath is not None
- else header_filepath, header_dir))
- return m.group(0)
- return repl
- output_source = RE_QUOTE_HEADER.sub(mk_repl('#include "{0}"', True), output_source)
- output_source = RE_ANGLE_HEADER.sub(mk_repl('#include <{0}>', False), output_source)
- output_source = RE_THC_GENERIC_FILE.sub(mk_repl('#define THC_GENERIC_FILE "{0}"'), output_source)
- # CMakeLists.txt rewrites
- if filepath.endswith('CMakeLists.txt'):
- output_source = output_source.replace('CUDA', 'HIP')
- output_source = output_source.replace('THC', 'THH')
- output_source = RE_CU_SUFFIX.sub('.hip', output_source)
- # Perform Kernel Launch Replacements
- if not hip_clang_launch:
- output_source = processKernelLaunches(output_source, stats)
- # Replace std:: with non-std:: versions
- if (filepath.endswith(".cu") or filepath.endswith(".cuh")) and "PowKernel" not in filepath:
- output_source = replace_math_functions(output_source)
- # Include header if device code is contained.
- output_source = hip_header_magic(output_source)
- # Replace the extern __shared__
- # NOTE: No longer needed after transition from hcc to hipclang.
- # output_source = replace_extern_shared(output_source)
- # Don't write out identical hipified files for extensions if dirpath has not changed
- if (
- is_pytorch_extension
- and orig_output_source == output_source
- and os.path.dirname(fin_path) == os.path.dirname(fout_path)
- ):
- return {"hipified_path": fin_path, "status": "[skipped, no changes]"}
- # Add hipify breadcrumb for C-style files to avoid re-hipification
- if fin_path != fout_path and match_extensions(fin_path, (".cu", ".cuh", ".c", ".cc", ".cpp", ".h", ".hpp")):
- output_source = HIPIFY_C_BREADCRUMB + output_source
- do_write = True
- if os.path.exists(fout_path):
- with open(fout_path, 'r', encoding='utf-8') as fout_old:
- do_write = fout_old.read() != output_source
- if do_write:
- try:
- with clean_ctx.open(fout_path, 'w', encoding='utf-8') as fout:
- fout.write(output_source)
- return {"hipified_path": fout_path, "status": "[ok]"}
- except PermissionError as e:
- print(f"{bcolors.WARNING}Failed to save {fout_path} with \"{e.strerror}\", leaving {fin_path} unchanged.{bcolors.ENDC}",
- file=sys.stderr)
- return {"hipified_path": fin_path, "status": "[skipped, no permissions]"}
- else:
- return {"hipified_path": fout_path, "status": "[skipped, already hipified]"}
- def file_specific_replacement(filepath, search_string, replace_string, strict=False):
- with openf(filepath, "r+") as f:
- contents = f.read()
- if strict:
- contents = re.sub(r'\b({0})\b'.format(re.escape(search_string)), lambda x: replace_string, contents)
- else:
- contents = contents.replace(search_string, replace_string)
- f.seek(0)
- f.write(contents)
- f.truncate()
- def file_add_header(filepath, header):
- with openf(filepath, "r+") as f:
- contents = f.read()
- if header[0] != "<" and header[-1] != ">":
- header = '"{0}"'.format(header)
- contents = ('#include {0} \n'.format(header)) + contents
- f.seek(0)
- f.write(contents)
- f.truncate()
- def fix_static_global_kernels(in_txt):
- """Static global kernels in HIP results in a compilation error."""
- in_txt = in_txt.replace(" __global__ static", "__global__")
- return in_txt
- RE_INCLUDE = re.compile(r"#include .*\n")
- def extract_arguments(start, string):
- """ Return the list of arguments in the upcoming function parameter closure.
- Example:
- string (input): '(blocks, threads, 0, THCState_getCurrentStream(state))'
- arguments (output):
- '[{'start': 1, 'end': 7},
- {'start': 8, 'end': 16},
- {'start': 17, 'end': 19},
- {'start': 20, 'end': 53}]'
- """
- arguments = []
- closures = {
- "<": 0,
- "(": 0
- }
- current_position = start
- argument_start_pos = current_position + 1
- # Search for final parenthesis
- while current_position < len(string):
- if string[current_position] == "(":
- closures["("] += 1
- elif string[current_position] == ")":
- closures["("] -= 1
- elif string[current_position] == "<":
- closures["<"] += 1
- elif string[current_position] == ">" and string[current_position - 1] != "-" and closures["<"] > 0:
- closures["<"] -= 1
- # Finished all arguments
- if closures["("] == 0 and closures["<"] == 0:
- # Add final argument
- arguments.append({"start": argument_start_pos, "end": current_position})
- break
- # Finished current argument
- if closures["("] == 1 and closures["<"] == 0 and string[current_position] == ",":
- arguments.append({"start": argument_start_pos, "end": current_position})
- argument_start_pos = current_position + 1
- current_position += 1
- return arguments
- def str2bool(v):
- """ArgumentParser doesn't support type=bool. Thus, this helper method will convert
- from possible string types to True / False."""
- if v.lower() in ('yes', 'true', 't', 'y', '1'):
- return True
- elif v.lower() in ('no', 'false', 'f', 'n', '0'):
- return False
- else:
- raise argparse.ArgumentTypeError('Boolean value expected.')
- def hipify(
- project_directory: str,
- show_detailed: bool = False,
- extensions: Iterable = (".cu", ".cuh", ".c", ".cc", ".cpp", ".h", ".in", ".hpp"),
- header_extensions: Iterable = (".cuh", ".h", ".hpp"),
- output_directory: str = "",
- header_include_dirs: Iterable = (),
- includes: Iterable = ('*',),
- extra_files: Iterable = (),
- out_of_place_only: bool = False,
- ignores: Iterable = (),
- show_progress: bool = True,
- hip_clang_launch: bool = False,
- is_pytorch_extension: bool = False,
- hipify_extra_files_only: bool = False,
- clean_ctx: Optional[GeneratedFileCleaner] = None
- ) -> HipifyFinalResult:
- if project_directory == "":
- project_directory = os.getcwd()
- # Verify the project directory exists.
- if not os.path.exists(project_directory):
- print("The project folder specified does not exist.")
- sys.exit(1)
- # If no output directory, provide a default one.
- if not output_directory:
- project_directory.rstrip("/")
- output_directory = project_directory + "_amd"
- if project_directory != output_directory:
- includes = [include.replace(project_directory, output_directory) for include in includes]
- ignores = [ignore.replace(project_directory, output_directory) for ignore in ignores]
- # Copy from project directory to output directory if not done already.
- if not os.path.exists(output_directory):
- shutil.copytree(project_directory, output_directory)
- all_files = list(matched_files_iter(output_directory, includes=includes,
- ignores=ignores, extensions=extensions,
- out_of_place_only=out_of_place_only,
- is_pytorch_extension=is_pytorch_extension))
- all_files_set = set(all_files)
- for f in extra_files:
- if not os.path.isabs(f):
- f = os.path.join(output_directory, f)
- if f not in all_files_set:
- all_files.append(f)
- # List all files in header_include_paths to ensure they are hipified
- from pathlib import Path
- for header_include_dir in header_include_dirs:
- if os.path.isabs(header_include_dir):
- header_include_dir_path = Path(header_include_dir)
- else:
- header_include_dir_path = Path(os.path.join(output_directory, header_include_dir))
- for path in header_include_dir_path.rglob('*'):
- if (
- path.is_file()
- and _fnmatch(str(path), includes)
- and (not _fnmatch(str(path), ignores))
- and match_extensions(path.name, header_extensions)
- ):
- all_files.append(str(path))
- if clean_ctx is None:
- clean_ctx = GeneratedFileCleaner(keep_intermediates=True)
- # Preprocessing statistics.
- stats: Dict[str, List] = {"unsupported_calls": [], "kernel_launches": []}
- for filepath in (all_files if not hipify_extra_files_only else extra_files):
- preprocess_file_and_save_result(output_directory, filepath, all_files, header_include_dirs,
- stats, hip_clang_launch, is_pytorch_extension, clean_ctx, show_progress)
- print(bcolors.OKGREEN + "Successfully preprocessed all matching files." + bcolors.ENDC, file=sys.stderr)
- # Show detailed summary
- if show_detailed:
- compute_stats(stats)
- return HIPIFY_FINAL_RESULT
|