1 /**
2  * Parsing mime/magic files.
3  * Authors:
4  *  $(LINK2 https://github.com/FreeSlave, Roman Chistokhodov)
5  * License:
6  *  $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
7  * Copyright:
8  *  Roman Chistokhodov, 2015-2016
9  */
10 
11 module mime.files.magic;
12 public import mime.magic;
13 import mime.common;
14 
15 private {
16     import std.algorithm;
17     import std.bitmanip;
18     import std.conv;
19     import std.exception;
20     import std.range;
21     import std..string;
22     import std.traits;
23     import std.typecons;
24     import mime.files.common;
25 }
26 
27 ///Exception thrown on parse errors while reading shared MIME database magic file.
28 final class MimeMagicFileException : Exception
29 {
30     this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) pure nothrow @safe {
31         super(msg, file, line, next);
32     }
33 }
34 
35 ///MIME type name and corresponding magic.
36 alias Tuple!(immutable(char)[], "mimeType", MimeMagic, "magic", bool, "deleteMagic") MagicEntry;
37 
38 private @trusted MagicMatch parseMagicMatch(ref const(char)[] current, uint myIndent)
39 {
40     enforce(current.length && current[0] == '>', "Expected '>' at the start of match rule");
41     current = current[1..$];
42     uint startOffset = parse!uint(current);
43     enforce(current.length && current[0] == '=', "Expected '=' after start-offset");
44     current = current[1..$];
45 
46     const(ubyte)[] value;
47     enum noMagic = "__NOMAGIC__";
48     if (current.length >= noMagic.length && current[0..noMagic.length] == noMagic) {
49         value = cast(typeof(value))noMagic;
50     } else {
51         enforce(current.length >= 2, "Expected 2 bytes to read value length");
52         ubyte[2] bigEndianLength;
53         bigEndianLength[0] = cast(ubyte)current[0];
54         bigEndianLength[1] = cast(ubyte)current[1];
55         current = current[2..$];
56 
57         auto valueLength = bigEndianToNative!ushort(bigEndianLength);
58         enforce(current.length >= valueLength, "Value is out of bounds");
59 
60         value = cast(typeof(value))(current[0..valueLength]);
61     }
62 
63     current = current[value.length..$];
64 
65     typeof(value) mask;
66     if (current.length && current[0] == '&') {
67         current = current[1..$];
68         enforce(current.length >= value.length, "Mask is out of bounds");
69         mask = cast(typeof(value))(current[0..value.length]);
70         current = current[value.length..$];
71     }
72 
73     uint wordSize = 1;
74     if (current.length && current[0] == '~') {
75         current = current[1..$];
76         wordSize = parse!uint(current);
77     }
78 
79     uint rangeLength = 1;
80     if (current.length && current[0] == '+') {
81         current = current[1..$];
82         rangeLength = parse!uint(current);
83     }
84 
85     size_t charIndex;
86     bool foundNewLine = false;
87     for (charIndex = 0; charIndex < current.length; ++charIndex) {
88         if (current[charIndex] == '\n') {
89             current = current[charIndex+1..$];
90             foundNewLine = true;
91             break;
92         }
93     }
94 
95     enforce(foundNewLine, "Expected new line character after match rule definition");
96 
97     auto type = MagicMatch.Type.string_;
98 
99     //Not sure if this is right...
100     if (wordSize == 2 && value.length == 2) {
101         type = MagicMatch.Type.host16;
102     } else if (wordSize == 4 && value.length == 4) {
103         type = MagicMatch.Type.host32;
104     }
105 
106     auto match = MagicMatch(type, value.idup, mask.idup, startOffset, rangeLength);
107 
108     //read sub rules
109     while (current.length && current[0] != '[') {
110         auto copy = current;
111         uint indent = parseIndent(copy);
112         if (indent > myIndent) {
113             current = copy;
114             MagicMatch submatch = parseMagicMatch(current, indent);
115             match.addSubmatch(submatch);
116         } else {
117             break;
118         }
119     }
120 
121     return match;
122 }
123 
124 /**
125  * Reads magic file contents and push magic entries to sink.
126  * Throws:
127  *  $(D MimeMagicFileException) on error.
128  */
129 void magicFileReader(OutRange)(const(void)[] data, OutRange sink) if (isOutputRange!(OutRange, MagicEntry))
130 {
131     try {
132         enum mimeMagic = "MIME-Magic\0\n";
133         auto content = cast(const(char)[])data;
134         if (!content.startsWith(mimeMagic)) {
135             throw new Exception("Not mime magic file");
136         }
137 
138         auto current = content[mimeMagic.length..$];
139 
140         while(current.length) {
141             enforce(current[0] == '[', "Expected '[' at the start of magic section");
142             current = current[1..$];
143 
144             auto result = findSplit(current[0..$], "]\n");
145             enforce(result[1].length, "Could not find \"]\\n\"");
146             current = result[2];
147 
148             auto sectionResult = findSplit(result[0], ":");
149             enforce(sectionResult[1].length, "Priority and MIME type must be splitted by ':'");
150 
151             uint priority = parse!uint(sectionResult[0]);
152             auto mimeType = sectionResult[2];
153 
154             auto magic = MimeMagic(priority);
155 
156             bool shouldDeleteMagic = false;
157             while (current.length && current[0] != '[') {
158                 uint indent = parseIndent(current);
159 
160                 MagicMatch match = parseMagicMatch(current, indent);
161                 if (isNoMagic(match.value)) {
162                     shouldDeleteMagic = true;
163                 } else {
164                     magic.addMatch(match);
165                 }
166             }
167             sink(MagicEntry(mimeType.idup, magic, shouldDeleteMagic));
168         }
169     } catch (Exception e) {
170         throw new MimeMagicFileException(e.msg, e.file, e.line, e.next);
171     }
172 }
173 
174 ///
175 unittest
176 {
177     auto data =
178         "MIME-Magic\0\n[60:text/x-diff]\n" ~
179         ">0=__NOMAGIC__\n" ~
180         "0>4=\x00\x02\x55\x40&\xff\xf0~2+8\n" ~
181             "1>12=\x00\x04\x55\x40\xff\xf0~4+10\n";
182 
183     void sink(MagicEntry t) {
184         assert(t.mimeType == "text/x-diff");
185         assert(t.magic.weight == 60);
186         assert(t.magic.matches.length == 1);
187         assert(t.deleteMagic);
188 
189         auto match = t.magic.matches[0];
190         assert(match.startOffset == 4);
191         assert(match.value.length == 2);
192         assert(match.mask.length == 2);
193         assert(match.type == MagicMatch.Type.host16);
194         assert(match.rangeLength == 8);
195         assert(match.submatches.length == 1);
196 
197         auto submatch = match.submatches[0];
198         assert(submatch.startOffset == 12);
199         assert(submatch.value.length == 4);
200         assert(!submatch.hasMask());
201         assert(submatch.type == MagicMatch.Type.host32);
202         assert(submatch.rangeLength == 10);
203     }
204     magicFileReader(data, &sink);
205 
206     void emptySink(MagicEntry t) {
207 
208     }
209     assertThrown!MimeMagicFileException(magicFileReader("MIME-wrong-magic", &emptySink));
210 
211 }