| | | 1 | | using System; |
| | | 2 | | using System.IO; |
| | | 3 | | using System.Text; |
| | | 4 | | |
| | | 5 | | namespace DirectSight.Common; |
| | | 6 | | |
| | | 7 | | /// <summary> |
| | | 8 | | /// Helper class to detect encoding of files. |
| | | 9 | | /// </summary> |
| | | 10 | | public class FileHelper |
| | | 11 | | { |
| | | 12 | | /// <summary> |
| | | 13 | | /// Determines a text file's encoding by analyzing its byte order mark (BOM) and if not found try parsing into difer |
| | | 14 | | /// Defaults to UTF8 when detection of the text file's endianness fails. |
| | | 15 | | /// </summary> |
| | | 16 | | /// <param name="filename">The text file to analyze.</param> |
| | | 17 | | /// <returns>The detected encoding.</returns> |
| | | 18 | | public static Encoding GetEncoding(string filename) |
| | 71 | 19 | | { |
| | 71 | 20 | | var encodingByBOM = GetEncodingByBOM(filename); |
| | 71 | 21 | | if (encodingByBOM != null) |
| | 3 | 22 | | { |
| | 3 | 23 | | return encodingByBOM; |
| | | 24 | | } |
| | | 25 | | |
| | | 26 | | // BOM not found, so try to parse characters into several encodings |
| | 68 | 27 | | var encodingByParsingUTF8 = GetEncodingByParsing(filename, Encoding.UTF8); |
| | 68 | 28 | | if (encodingByParsingUTF8 != null) |
| | 67 | 29 | | { |
| | 67 | 30 | | return encodingByParsingUTF8; |
| | | 31 | | } |
| | | 32 | | |
| | 1 | 33 | | var encodingByParsingLatin1 = GetEncodingByParsing(filename, Encoding.GetEncoding("iso-8859-1")); |
| | 1 | 34 | | if (encodingByParsingLatin1 != null) |
| | 1 | 35 | | { |
| | 1 | 36 | | return encodingByParsingLatin1; |
| | | 37 | | } |
| | | 38 | | |
| | 0 | 39 | | return Encoding.UTF8; |
| | 71 | 40 | | } |
| | | 41 | | |
| | | 42 | | /// <summary> |
| | | 43 | | /// Determines a text file's encoding by analyzing its byte order mark (BOM). |
| | | 44 | | /// </summary> |
| | | 45 | | /// <param name="filename">The text file to analyze.</param> |
| | | 46 | | /// <returns>The detected encoding.</returns> |
| | | 47 | | private static Encoding GetEncodingByBOM(string filename) |
| | 71 | 48 | | { |
| | | 49 | | // Read the BOM |
| | 71 | 50 | | var byteOrderMark = new byte[4]; |
| | 71 | 51 | | using (var file = new FileStream(filename, FileMode.Open, FileAccess.Read)) |
| | 71 | 52 | | { |
| | 71 | 53 | | file.Read(byteOrderMark, 0, 4); |
| | 71 | 54 | | } |
| | | 55 | | |
| | 71 | 56 | | if (byteOrderMark[0] == 0xef && byteOrderMark[1] == 0xbb && byteOrderMark[2] == 0xbf) |
| | 1 | 57 | | { |
| | 1 | 58 | | return Encoding.UTF8; |
| | | 59 | | } |
| | | 60 | | |
| | 70 | 61 | | if (byteOrderMark[0] == 0xff && byteOrderMark[1] == 0xfe) |
| | 1 | 62 | | { |
| | | 63 | | // UTF-16LE |
| | 1 | 64 | | return Encoding.Unicode; |
| | | 65 | | } |
| | | 66 | | |
| | 69 | 67 | | if (byteOrderMark[0] == 0xfe && byteOrderMark[1] == 0xff) |
| | 1 | 68 | | { |
| | | 69 | | // UTF-16BE |
| | 1 | 70 | | return Encoding.BigEndianUnicode; |
| | | 71 | | } |
| | | 72 | | |
| | 68 | 73 | | if (byteOrderMark[0] == 0 && byteOrderMark[1] == 0 && byteOrderMark[2] == 0xfe && byteOrderMark[3] == 0xff) |
| | 0 | 74 | | { |
| | 0 | 75 | | return Encoding.UTF32; |
| | | 76 | | } |
| | | 77 | | |
| | | 78 | | // no BOM found |
| | 68 | 79 | | return null; |
| | 71 | 80 | | } |
| | | 81 | | |
| | | 82 | | private static Encoding GetEncodingByParsing(string filename, Encoding encoding) |
| | 69 | 83 | | { |
| | 69 | 84 | | var encodingVerifier = Encoding.GetEncoding(encoding.BodyName, new EncoderExceptionFallback(), new DecoderExcept |
| | | 85 | | |
| | | 86 | | try |
| | 69 | 87 | | { |
| | 69 | 88 | | using (var textReader = new StreamReader(filename, encodingVerifier, detectEncodingFromByteOrderMarks: true) |
| | 69 | 89 | | { |
| | 3791 | 90 | | while (!textReader.EndOfStream) |
| | 3722 | 91 | | { |
| | 3722 | 92 | | textReader.ReadLine(); // in order to increment the stream position |
| | 3722 | 93 | | } |
| | | 94 | | |
| | | 95 | | // all text parsed ok |
| | 68 | 96 | | return textReader.CurrentEncoding; |
| | | 97 | | } |
| | | 98 | | } |
| | 1 | 99 | | catch (Exception) |
| | 1 | 100 | | { |
| | 1 | 101 | | return null; |
| | | 102 | | } |
| | 69 | 103 | | } |
| | | 104 | | } |