xfile.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. # update: 2024-3-31-10
  2. """
  3. mode:
  4. # r 只读,默认打开方式,当文件不存在时会报错
  5. # w 只写,当文件不存在时会自动创建文件,文件内容只能是字符串,只能写入字符串
  6. # r+ 可读可写,当文件不存在时会报错
  7. # w+ 可读可写。当文件不存在时会新建
  8. # a 追加文件,不可读
  9. # a+ 追加文件,可读可写
  10. # rb 以二进制读模式打开,只可读
  11. # rb+ 以二进制写读写模式打开,可读可写,当文件不存在时报错
  12. # wb 以位进制写模式打开,只可写
  13. # wb+ 以二进制读写模式打开,可读可写。当文件不存在时新建
  14. # ab 以二进制追加模式打开,追加文件,不可读
  15. # ab+ 以二进制读写模式打开,追加文件。可读可写
  16. """
  17. import hashlib
  18. import base64
  19. import pickle
  20. import sys
  21. import os
  22. def read_bytes(file_path):
  23. with open(file_path, 'rb') as f:
  24. return f.read()
  25. def read_text(file_path):
  26. with open(file_path, 'r') as f:
  27. return f.read()
  28. def write_bytes(path, data=b''):
  29. with open(path, 'wb') as f:
  30. f.write(data)
  31. def write_text(path, data='', mode='w'):
  32. with open(path, mode) as f:
  33. f.write(data)
  34. def write_file(path, data=None, mode='wb'):
  35. with open(path, mode) as f:
  36. f.write(data)
  37. def load_pickle_file(file_path):
  38. with open(file_path, 'rb+') as f:
  39. return pickle.load(f)
  40. def save_pickle_file(file_path, data):
  41. with open(file_path, 'wb') as f:
  42. f.write(pickle.dumps(data))
  43. def get_file_md5(file_path):
  44. """获取文件md5"""
  45. m = hashlib.md5()
  46. with open(file_path, 'rb') as f:
  47. m.update(f.read())
  48. return m.hexdigest()
  49. def get_big_file_md5(file_path, block_size=8 * 1024):
  50. """获取文件md5(默认使用8KB作为分块大小)"""
  51. m = hashlib.md5()
  52. with open(file_path, 'rb') as f:
  53. while True:
  54. block = f.read(block_size)
  55. if not block:
  56. break
  57. m.update(block)
  58. return m.hexdigest()
  59. def _file_iterator(file_object, block_size=8 * 1024):
  60. with file_object:
  61. block = file_object.read(block_size)
  62. while len(block) > 0:
  63. yield block
  64. block = file_object.read(block_size)
  65. def get_big_file_md5_v2(file_path):
  66. """获取文件md5(默认使用8KB作为分块大小)"""
  67. file_object = open(file_path, 'rb')
  68. blocks = _file_iterator(file_object)
  69. m = hashlib.md5()
  70. for block in blocks:
  71. m.update(block)
  72. return m.hexdigest()
  73. def get_file_b64_md5(file_path):
  74. """获取文件的b64的md5"""
  75. m = hashlib.md5()
  76. with open(file_path, 'rb') as f:
  77. block = f.read()
  78. encoded = base64.b64encode(block)
  79. m.update(encoded)
  80. return m.hexdigest()
  81. def get_big_file_b64_md5(file_path, block_size=3 * 1024 * 1024):
  82. """流式获取文件的b64的md5(默认使用3MB作为分块大小)"""
  83. with open(file_path, 'rb') as f1, open(f'{file_path}.b64', 'wb') as f2:
  84. while True:
  85. block = f1.read(block_size)
  86. if not block:
  87. break
  88. b64_data = base64.b64encode(block)
  89. f2.write(b64_data)
  90. return get_big_file_md5(f'{file_path}.b64')
  91. def get_var_size(object, unit='MB'):
  92. """
  93. unit: GB/MB/KB/B
  94. """
  95. unit_dict = {
  96. 'GB': 3,
  97. 'MB': 2,
  98. 'KB': 1,
  99. 'B': 0,
  100. }
  101. byte = sys.getsizeof(object)
  102. size = byte / (1024 ** unit_dict.get(unit))
  103. return float(round(size, 1))
  104. def get_file_size(file_path, unit='MB'):
  105. """
  106. unit: GB/MB/KB/B
  107. """
  108. if not is_file(file_path):
  109. return None
  110. unit_dict = {
  111. 'GB': 3,
  112. 'MB': 2,
  113. 'KB': 1,
  114. 'B': 0,
  115. }
  116. byte = os.path.getsize(file_path)
  117. size = byte / (1024 ** unit_dict.get(unit))
  118. return float(round(size, 1))
  119. def get_file_path_list(dir_path='/root', path_list=None):
  120. """
  121. 获取指定目录下全部文件
  122. Example:
  123. get_file_path_list('/opt')
  124. ['/opt/1.txt', '/opt/2.txt']
  125. # import glob
  126. # if os.path.isfile(input_dir):
  127. # img_list = [input_dir]
  128. # else:
  129. # img_list = sorted(glob.glob(os.path.join(input_dir, '*')))
  130. """
  131. if not path_list:
  132. path_list = []
  133. for path, folders, files in os.walk(dir_path):
  134. for file_name in files:
  135. file_path = os.path.join(path, file_name)
  136. if file_path in path_list:
  137. continue
  138. # if file_path.split('.')[-1] == 'pdb':
  139. # remove_file(file_path)
  140. # print(f"#file_path: {file_path}")
  141. path_list.append(file_path)
  142. for folder_name in folders:
  143. get_file_path_list(os.path.join(path, folder_name), path_list)
  144. return path_list
  145. def mkdir(dir_path):
  146. """
  147. 创建文件目录
  148. Example:
  149. mkdir('/share/abc/xyz/1.txt')
  150. True
  151. """
  152. if not dir_path:
  153. return False
  154. if dir_path[0] != '/':
  155. dir_path = f"{get_pwd()}/{dir_path}"
  156. path = '/'
  157. for part in dir_path.split('/'):
  158. path += f"{part}/"
  159. if not os.path.isdir(path):
  160. os.mkdir(path)
  161. return True
  162. def get_pwd():
  163. """
  164. 获取当前路径
  165. """
  166. path = sys.path[0]
  167. if not os.path.isdir(path):
  168. path = os.path.dirname(path)
  169. return path
  170. def is_file(file_path):
  171. """判断文件"""
  172. return os.path.isfile(file_path)
  173. def is_dir(file_path):
  174. """判断目录"""
  175. return os.path.isdir(file_path)
  176. def remove_file(file_path):
  177. """删除文件"""
  178. return os.remove(file_path)
  179. def move_file(s_path, d_path):
  180. """移动文件"""
  181. return os.rename(s_path, d_path)
  182. if __name__ == '__main__':
  183. """
  184. """
  185. # --- test ---
  186. # file_dir = r'E:\casper\repositories\repositories\SRI-DYZBC.Cockpit-cpp'
  187. # file_dir = r'E:\casper\repositories\repositories\SRI-DYZBC.Cockpit-cpp\EgoQt'
  188. # file_dir = r'E:\casper\repositories\repositories\SRI-DYZBC.Cockpit-cpp\EgoQt\thirdparty'
  189. # file_dir = r'E:\casper\repositories\repositories\SRI-DYZBC.Cockpit-cpp\EgoServer\webrtcinterop\x64\Release'
  190. # file_dir = r'E:\casper\repositories\repositories\SRI-DYZBC.Cockpit-cpp\EgoQt\webrtcinterop\x64\Release'
  191. # file_dir = r'E:\casper\repositories\repositories\SRI-DYZBC.Cockpit-cpp\EgoServer'
  192. file_dir = r'E:\casper\repositories\repositories\SRI-DYZBC.Cockpit-cpp\EgoQt'
  193. # file_dir = r'E:\casper\repositories\repositories\SRI-DYZBC.Cockpit-cpp\EgoServer'
  194. path_list = get_file_path_list(file_dir)
  195. print(path_list)
  196. # remove_file(r'E:\casper\repositories\repositories\SRI-DYZBC.Cockpit-cpp\EgoQt\webrtcinterop\x64\Release\webrtcinterop.obj')