sas_constants.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. from __future__ import annotations
  2. from typing import Final
  3. magic: Final = (
  4. b"\x00\x00\x00\x00\x00\x00\x00\x00"
  5. + b"\x00\x00\x00\x00\xc2\xea\x81\x60"
  6. + b"\xb3\x14\x11\xcf\xbd\x92\x08\x00"
  7. + b"\x09\xc7\x31\x8c\x18\x1f\x10\x11"
  8. )
  9. align_1_checker_value: Final = b"3"
  10. align_1_offset: Final = 32
  11. align_1_length: Final = 1
  12. align_1_value: Final = 4
  13. u64_byte_checker_value: Final = b"3"
  14. align_2_offset: Final = 35
  15. align_2_length: Final = 1
  16. align_2_value: Final = 4
  17. endianness_offset: Final = 37
  18. endianness_length: Final = 1
  19. platform_offset: Final = 39
  20. platform_length: Final = 1
  21. encoding_offset: Final = 70
  22. encoding_length: Final = 1
  23. dataset_offset: Final = 92
  24. dataset_length: Final = 64
  25. file_type_offset: Final = 156
  26. file_type_length: Final = 8
  27. date_created_offset: Final = 164
  28. date_created_length: Final = 8
  29. date_modified_offset: Final = 172
  30. date_modified_length: Final = 8
  31. header_size_offset: Final = 196
  32. header_size_length: Final = 4
  33. page_size_offset: Final = 200
  34. page_size_length: Final = 4
  35. page_count_offset: Final = 204
  36. page_count_length: Final = 4
  37. sas_release_offset: Final = 216
  38. sas_release_length: Final = 8
  39. sas_server_type_offset: Final = 224
  40. sas_server_type_length: Final = 16
  41. os_version_number_offset: Final = 240
  42. os_version_number_length: Final = 16
  43. os_maker_offset: Final = 256
  44. os_maker_length: Final = 16
  45. os_name_offset: Final = 272
  46. os_name_length: Final = 16
  47. page_bit_offset_x86: Final = 16
  48. page_bit_offset_x64: Final = 32
  49. subheader_pointer_length_x86: Final = 12
  50. subheader_pointer_length_x64: Final = 24
  51. page_type_offset: Final = 0
  52. page_type_length: Final = 2
  53. block_count_offset: Final = 2
  54. block_count_length: Final = 2
  55. subheader_count_offset: Final = 4
  56. subheader_count_length: Final = 2
  57. page_type_mask: Final = 0x0F00
  58. # Keep "page_comp_type" bits
  59. page_type_mask2: Final = 0xF000 | page_type_mask
  60. page_meta_type: Final = 0x0000
  61. page_data_type: Final = 0x0100
  62. page_mix_type: Final = 0x0200
  63. page_amd_type: Final = 0x0400
  64. page_meta2_type: Final = 0x4000
  65. page_comp_type: Final = 0x9000
  66. page_meta_types: Final = [page_meta_type, page_meta2_type]
  67. subheader_pointers_offset: Final = 8
  68. truncated_subheader_id: Final = 1
  69. compressed_subheader_id: Final = 4
  70. compressed_subheader_type: Final = 1
  71. text_block_size_length: Final = 2
  72. row_length_offset_multiplier: Final = 5
  73. row_count_offset_multiplier: Final = 6
  74. col_count_p1_multiplier: Final = 9
  75. col_count_p2_multiplier: Final = 10
  76. row_count_on_mix_page_offset_multiplier: Final = 15
  77. column_name_pointer_length: Final = 8
  78. column_name_text_subheader_offset: Final = 0
  79. column_name_text_subheader_length: Final = 2
  80. column_name_offset_offset: Final = 2
  81. column_name_offset_length: Final = 2
  82. column_name_length_offset: Final = 4
  83. column_name_length_length: Final = 2
  84. column_data_offset_offset: Final = 8
  85. column_data_length_offset: Final = 8
  86. column_data_length_length: Final = 4
  87. column_type_offset: Final = 14
  88. column_type_length: Final = 1
  89. column_format_text_subheader_index_offset: Final = 22
  90. column_format_text_subheader_index_length: Final = 2
  91. column_format_offset_offset: Final = 24
  92. column_format_offset_length: Final = 2
  93. column_format_length_offset: Final = 26
  94. column_format_length_length: Final = 2
  95. column_label_text_subheader_index_offset: Final = 28
  96. column_label_text_subheader_index_length: Final = 2
  97. column_label_offset_offset: Final = 30
  98. column_label_offset_length: Final = 2
  99. column_label_length_offset: Final = 32
  100. column_label_length_length: Final = 2
  101. rle_compression: Final = b"SASYZCRL"
  102. rdc_compression: Final = b"SASYZCR2"
  103. compression_literals: Final = [rle_compression, rdc_compression]
  104. # Incomplete list of encodings, using SAS nomenclature:
  105. # https://support.sas.com/documentation/onlinedoc/dfdmstudio/2.6/dmpdmsug/Content/dfU_Encodings_SAS.html
  106. # corresponding to the Python documentation of standard encodings
  107. # https://docs.python.org/3/library/codecs.html#standard-encodings
  108. encoding_names: Final = {
  109. 20: "utf-8",
  110. 29: "latin1",
  111. 30: "latin2",
  112. 31: "latin3",
  113. 32: "latin4",
  114. 33: "cyrillic",
  115. 34: "arabic",
  116. 35: "greek",
  117. 36: "hebrew",
  118. 37: "latin5",
  119. 38: "latin6",
  120. 39: "cp874",
  121. 40: "latin9",
  122. 41: "cp437",
  123. 42: "cp850",
  124. 43: "cp852",
  125. 44: "cp857",
  126. 45: "cp858",
  127. 46: "cp862",
  128. 47: "cp864",
  129. 48: "cp865",
  130. 49: "cp866",
  131. 50: "cp869",
  132. 51: "cp874",
  133. # 52: "", # not found
  134. # 53: "", # not found
  135. # 54: "", # not found
  136. 55: "cp720",
  137. 56: "cp737",
  138. 57: "cp775",
  139. 58: "cp860",
  140. 59: "cp863",
  141. 60: "cp1250",
  142. 61: "cp1251",
  143. 62: "cp1252",
  144. 63: "cp1253",
  145. 64: "cp1254",
  146. 65: "cp1255",
  147. 66: "cp1256",
  148. 67: "cp1257",
  149. 68: "cp1258",
  150. 118: "cp950",
  151. # 119: "", # not found
  152. 123: "big5",
  153. 125: "gb2312",
  154. 126: "cp936",
  155. 134: "euc_jp",
  156. 136: "cp932",
  157. 138: "shift_jis",
  158. 140: "euc-kr",
  159. 141: "cp949",
  160. 227: "latin8",
  161. # 228: "", # not found
  162. # 229: "" # not found
  163. }
  164. class SASIndex:
  165. row_size_index: Final = 0
  166. column_size_index: Final = 1
  167. subheader_counts_index: Final = 2
  168. column_text_index: Final = 3
  169. column_name_index: Final = 4
  170. column_attributes_index: Final = 5
  171. format_and_label_index: Final = 6
  172. column_list_index: Final = 7
  173. data_subheader_index: Final = 8
  174. subheader_signature_to_index: Final = {
  175. b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
  176. b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
  177. b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index,
  178. b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index,
  179. b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
  180. b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
  181. b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index,
  182. b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index,
  183. b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index,
  184. b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
  185. b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index,
  186. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
  187. b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index,
  188. b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
  189. b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index,
  190. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
  191. b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
  192. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
  193. b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index,
  194. b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
  195. b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index,
  196. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
  197. b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index,
  198. b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
  199. b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index,
  200. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
  201. b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index,
  202. b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
  203. b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index,
  204. b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
  205. }
  206. # List of frequently used SAS date and datetime formats
  207. # http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm
  208. # https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java
  209. sas_date_formats: Final = (
  210. "DATE",
  211. "DAY",
  212. "DDMMYY",
  213. "DOWNAME",
  214. "JULDAY",
  215. "JULIAN",
  216. "MMDDYY",
  217. "MMYY",
  218. "MMYYC",
  219. "MMYYD",
  220. "MMYYP",
  221. "MMYYS",
  222. "MMYYN",
  223. "MONNAME",
  224. "MONTH",
  225. "MONYY",
  226. "QTR",
  227. "QTRR",
  228. "NENGO",
  229. "WEEKDATE",
  230. "WEEKDATX",
  231. "WEEKDAY",
  232. "WEEKV",
  233. "WORDDATE",
  234. "WORDDATX",
  235. "YEAR",
  236. "YYMM",
  237. "YYMMC",
  238. "YYMMD",
  239. "YYMMP",
  240. "YYMMS",
  241. "YYMMN",
  242. "YYMON",
  243. "YYMMDD",
  244. "YYQ",
  245. "YYQC",
  246. "YYQD",
  247. "YYQP",
  248. "YYQS",
  249. "YYQN",
  250. "YYQR",
  251. "YYQRC",
  252. "YYQRD",
  253. "YYQRP",
  254. "YYQRS",
  255. "YYQRN",
  256. "YYMMDDP",
  257. "YYMMDDC",
  258. "E8601DA",
  259. "YYMMDDN",
  260. "MMDDYYC",
  261. "MMDDYYS",
  262. "MMDDYYD",
  263. "YYMMDDS",
  264. "B8601DA",
  265. "DDMMYYN",
  266. "YYMMDDD",
  267. "DDMMYYB",
  268. "DDMMYYP",
  269. "MMDDYYP",
  270. "YYMMDDB",
  271. "MMDDYYN",
  272. "DDMMYYC",
  273. "DDMMYYD",
  274. "DDMMYYS",
  275. "MINGUO",
  276. )
  277. sas_datetime_formats: Final = (
  278. "DATETIME",
  279. "DTWKDATX",
  280. "B8601DN",
  281. "B8601DT",
  282. "B8601DX",
  283. "B8601DZ",
  284. "B8601LX",
  285. "E8601DN",
  286. "E8601DT",
  287. "E8601DX",
  288. "E8601DZ",
  289. "E8601LX",
  290. "DATEAMPM",
  291. "DTDATE",
  292. "DTMONYY",
  293. "DTMONYY",
  294. "DTWKDATX",
  295. "DTYEAR",
  296. "TOD",
  297. "MDYAMPM",
  298. )