csr2022.h 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 2005-2015, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. */
  9. #ifndef __CSR2022_H
  10. #define __CSR2022_H
  11. #include "unicode/utypes.h"
  12. #if !UCONFIG_NO_CONVERSION
  13. #include "csrecog.h"
  14. U_NAMESPACE_BEGIN
  15. class CharsetMatch;
  16. /**
  17. * class CharsetRecog_2022 part of the ICU charset detection imlementation.
  18. * This is a superclass for the individual detectors for
  19. * each of the detectable members of the ISO 2022 family
  20. * of encodings.
  21. *
  22. * The separate classes are nested within this class.
  23. *
  24. * @internal
  25. */
  26. class CharsetRecog_2022 : public CharsetRecognizer
  27. {
  28. public:
  29. virtual ~CharsetRecog_2022() = 0;
  30. protected:
  31. /**
  32. * Matching function shared among the 2022 detectors JP, CN and KR
  33. * Counts up the number of legal an unrecognized escape sequences in
  34. * the sample of text, and computes a score based on the total number &
  35. * the proportion that fit the encoding.
  36. *
  37. *
  38. * @param text the byte buffer containing text to analyse
  39. * @param textLen the size of the text in the byte.
  40. * @param escapeSequences the byte escape sequences to test for.
  41. * @return match quality, in the range of 0-100.
  42. */
  43. int32_t match_2022(const uint8_t *text,
  44. int32_t textLen,
  45. const uint8_t escapeSequences[][5],
  46. int32_t escapeSequences_length) const;
  47. };
  48. class CharsetRecog_2022JP :public CharsetRecog_2022
  49. {
  50. public:
  51. virtual ~CharsetRecog_2022JP();
  52. const char *getName() const;
  53. UBool match(InputText *textIn, CharsetMatch *results) const;
  54. };
  55. #if !UCONFIG_ONLY_HTML_CONVERSION
  56. class CharsetRecog_2022KR :public CharsetRecog_2022 {
  57. public:
  58. virtual ~CharsetRecog_2022KR();
  59. const char *getName() const;
  60. UBool match(InputText *textIn, CharsetMatch *results) const;
  61. };
  62. class CharsetRecog_2022CN :public CharsetRecog_2022
  63. {
  64. public:
  65. virtual ~CharsetRecog_2022CN();
  66. const char* getName() const;
  67. UBool match(InputText *textIn, CharsetMatch *results) const;
  68. };
  69. #endif
  70. U_NAMESPACE_END
  71. #endif
  72. #endif /* __CSR2022_H */