1 /*
2  * Copyright (C) 2020, 2021  Vladimir Panteleev <btdu@cy.md>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 /// Path manipulation and storage
20 module btdu.paths;
21 
22 import ae.utils.appender;
23 import ae.utils.meta;
24 
25 import std.algorithm.comparison;
26 import std.algorithm.iteration;
27 import std.algorithm.searching;
28 import std.experimental.allocator : makeArray, make;
29 import std.string;
30 import std.traits : Unqual;
31 
32 import btdu.alloc;
33 
34 import containers.hashmap;
35 import containers.internal.hash : generateHash;
36 
37 /// Common definitions for a deduplicated trie for paths.
38 mixin template SimplePath()
39 {
40 	// Size selected empirically
41 	alias NameString = InlineString!23;
42 
43 	/// Parent directory
44 	typeof(this)* parent;
45 	/// Directory items, if any
46 	typeof(this)* firstChild;
47 	/// Next item in the parent directory, if any
48 	typeof(this)* nextSibling;
49 	/// Base name
50 	/// Names prefixed with a NUL character indicate "special" nodes,
51 	/// which do not correspond to a filesystem path.
52 	immutable NameString name;
53 
54 	/*private*/ this(typeof(this)* parent, NameString name)
55 	{
56 		this.parent = parent;
57 		this.name = name;
58 	}
59 
60 	// Returns pointer to pointer to child, or pointer to where it should be added.
61 	private inout(typeof(this)*)* find(in char[] name) inout
62 	{
63 		inout(typeof(this)*)* child;
64 		for (child = &firstChild; *child; child = &(*child).nextSibling)
65 			if ((*child).name[] == name)
66 				break;
67 		return child;
68 	}
69 
70 	inout(typeof(this)*) opBinaryRight(string op : "in")(in char[] name) inout { return *find(name); }
71 	ref inout(typeof(this)) opIndex(in char[] name) inout { return *(name in this); }
72 
73 	invariant
74 	{
75 		if (name)
76 		{
77 			assert(parent !is null, "Named node without parent");
78 			// assert((*parent)[name.toString()] is &this, "Child/parent mismatch");
79 		}
80 		else // root
81 			assert(!parent, "Unnamed node with parent");
82 	}
83 
84 	/// Append a single path segment to this one.
85 	typeof(this)* appendName(in char[] name)
86 	{
87 		assert(name.length, "Empty path segment");
88 		assert(name.indexOf('/') < 0, "Path segment contains /: " ~ name);
89 		auto ppnext = find(name);
90 		if (auto pnext = *ppnext)
91 			return pnext;
92 		else
93 			return *ppnext = growAllocator.make!(typeof(this))(&this, NameString(name));
94 	}
95 
96 	/// ditto
97 	private typeof(this)* appendName(NameString name)
98 	{
99 		auto ppnext = find(name[]);
100 		if (auto pnext = *ppnext)
101 			return pnext;
102 		else
103 			return *ppnext = growAllocator.make!(typeof(this))(&this, name);
104 	}
105 
106 	/// Append a normalized relative string path to this one.
107 	typeof(this)* appendPath(in char[] path)
108 	{
109 		auto p = path.indexOf('/');
110 		auto nextName = p < 0 ? path : path[0 .. p];
111 		auto next = appendName(nextName);
112 		if (p < 0)
113 			return next;
114 		else
115 			return next.appendPath(path[p + 1 .. $]);
116 	}
117 
118 	/// ditto
119 	typeof(this)* appendPath(in SubPath* path)
120 	{
121 		typeof(this)* recurse(typeof(this)* base, in SubPath* path)
122 		{
123 			if (!path.parent) // root
124 				return base;
125 			base = recurse(base, path.parent);
126 			return base.appendName(path.name);
127 		}
128 
129 		return recurse(&this, path);
130 	}
131 
132 	/// ditto
133 	typeof(this)* appendPath(in GlobalPath* path)
134 	{
135 		typeof(this)* recurse(typeof(this)* base, in GlobalPath* path)
136 		{
137 			if (path.parent)
138 				base = recurse(base, path.parent);
139 			return base.appendPath(path.subPath);
140 		}
141 
142 		return recurse(&this, path);
143 	}
144 
145 	/// Return an iterator for path fragments.
146 	/// Iterates from inner-most to top level.
147 	auto range() const
148 	{
149 		alias This = typeof(this)*;
150 		static struct Range
151 		{
152 			This p;
153 			bool empty() const { return !p; }
154 			string front() { return p.name[]; }
155 			void popFront() { p = p.parent; }
156 		}
157 		return Range(&this);
158 	}
159 
160 	void toString(scope void delegate(const(char)[]) sink) const
161 	{
162 		if (parent)
163 		{
164 			parent.toString(sink);
165 			sink("/");
166 		}
167 		sink(humanName);
168 	}
169 
170 	string humanName() const
171 	{
172 		string humanName = name[];
173 		humanName.skipOverNul();
174 		return humanName;
175 	}
176 }
177 
178 /// Common operations for linked-list-like path structures
179 mixin template PathCommon()
180 {
181 	/// Returns the total length of this path chain,
182 	/// including this instance.
183 	private size_t chainLength() const
184 	{
185 		return 1 + (parent ? parent.chainLength() : 0);
186 	}
187 
188 	/// Returns the common prefix of `paths`.
189 	/// Assumes that if two pointers are different, they point at different paths.
190 	/// Destructively mutates `paths` as scratch space.
191 	static typeof(this)* commonPrefix(typeof(this)*[] paths)
192 	{
193 		// First, calculate the lengths
194 		static FastAppender!size_t lengths;
195 		lengths.clear();
196 		foreach (ref path; paths)
197 			lengths.put(path.chainLength);
198 
199 		// Rewind all paths to the minimal path's length
200 		auto minLength = lengths.get().reduce!min;
201 		foreach (i, ref path; paths)
202 			while (lengths.get()[i] > minLength)
203 			{
204 				lengths.get()[i]--;
205 				path = path.parent;
206 			}
207 
208 		// Rewind all paths until the tip points at the same thing
209 		while (paths.any!(path => path !is paths[0]))
210 			foreach (ref path; paths)
211 				path = path.parent;
212 
213 		// All paths now point at the same thing.
214 		return paths[0];
215 	}
216 }
217 
218 /// Implements comparison for linked-list-like path structures.
219 /// Requires `PathCommon` and a `compareContents` definition.
220 mixin template PathCmp()
221 {
222 	int opCmp(const ref typeof(this) b) const
223 	{
224 		if (this is b)
225 			return 0;
226 
227 		// Because the lengths may be uneven, query them first
228 		auto aLength = this.chainLength();
229 		auto bLength = b   .chainLength();
230 		auto maxLength = max(aLength, bLength);
231 
232 		// We are starting from the tail end of two
233 		// linked lists with possibly different length
234 		int recurse(
235 			// The tail so far
236 			in typeof(this)*[2] paths,
237 			// How many nodes this side is "shorter" by
238 			size_t[2] rem,
239 		)
240 		{
241 			if (paths[0] is paths[1])
242 				return 0; // Also covers the [null, null] case which stops recursion
243 
244 			// What we will recurse with
245 			const(typeof(this))*[2] recPaths;
246 			size_t[2] recRem;
247 			// What we will compare in this step (if recursion returns 0)
248 			const(typeof(this))*[2] thisPaths;
249 
250 			foreach (n; 0 .. 2)
251 			{
252 				if (rem[n])
253 				{
254 					thisPaths[n] = null;
255 					recPaths[n] = paths[n];
256 					recRem[n] = rem[n] - 1;
257 				}
258 				else
259 				{
260 					thisPaths[n] = paths[n];
261 					recPaths[n] = paths[n].parent;
262 					recRem[n] = 0;
263 				}
264 			}
265 
266 			int res = recurse(recPaths, recRem);
267 			if (res)
268 				return res;
269 
270 			if ((thisPaths[0] is null) != (thisPaths[1] is null))
271 				return thisPaths[0] is null ? -1 : 1;
272 			return thisPaths[0].compareContents(*thisPaths[1]);
273 		}
274 		return recurse([&this, &b], [
275 			maxLength - aLength,
276 			maxLength - bLength,
277 		]);
278 	}
279 }
280 
281 /// Path within a tree (subvolume)
282 struct SubPath
283 {
284 	mixin SimplePath;
285 	mixin PathCommon;
286 	mixin PathCmp;
287 
288 	/// PathCmp implementation
289 	private int compareContents(const ref typeof(this) b) const
290 	{
291 		return cmp(name[], b.name[]);
292 	}
293 }
294 
295 /// Global path (spanning multiple trees)
296 /// This is to allow efficiently representing paths where the prefix
297 /// (subvolume path) varies, e.g.:
298 /// - /@root/usr/lib/libfoo.so.1.0.0
299 /// - /backups/@root-20200101000000/usr/lib/libfoo.so.1.0.0
300 /// - /backups/@root-20200102000000/usr/lib/libfoo.so.1.0.0
301 /// etc.
302 /// Here we can store /backups/@root-20200102000000 etc. as one
303 /// SubPath and /usr/lib/libfoo.so.1.0.0 as another, with the
304 /// GlobalPath representing a concatenation of the two.
305 struct GlobalPath
306 {
307 	GlobalPath* parent; /// Parent tree (or null if none)
308 	SubPath* subPath;   /// Path within this filesystem
309 
310 	void toString(scope void delegate(const(char)[]) sink) const
311 	{
312 		if (parent)
313 			parent.toString(sink);
314 		subPath.toString(sink);
315 	}
316 
317 	size_t length() const
318 	{
319 		size_t length = 0;
320 		toString((const(char)[] s) { length += s.length; });
321 		return length;
322 	}
323 
324 	/// PathCmp implementation
325 	private int compareContents(const ref typeof(this) b) const
326 	{
327 		return subPath.opCmp(*b.subPath);
328 	}
329 
330 	/// Return an iterator for subpaths.
331 	/// Iterates from inner-most to top level.
332 	auto range() const
333 	{
334 		static struct Range
335 		{
336 			const(GlobalPath)* p;
337 			bool empty() const { return !p; }
338 			const(SubPath)* front() { return p.subPath; }
339 			void popFront() { p = p.parent; }
340 		}
341 		return Range(&this);
342 	}
343 
344 	mixin PathCommon;
345 	mixin PathCmp;
346 }
347 
348 enum SampleType
349 {
350 	represented,
351 	exclusive,
352 	shared_,
353 }
354 
355 /// Browser path (GUI hierarchy)
356 struct BrowserPath
357 {
358 	mixin SimplePath;
359 	mixin PathCommon;
360 
361 	struct Data
362 	{
363 		ulong samples; /// For non-leaves, sum of leaves
364 		ulong duration; /// Total hnsecs
365 		ulong[3] logicalOffsets = -1; /// Examples (the last 3 seen) of logical offsets
366 	}
367 	Data[enumLength!SampleType] data;
368 	double distributedSamples = 0;
369 
370 	void addSample(SampleType type, ulong logicalOffset, ulong duration)
371 	{
372 		data[type].samples++;
373 		data[type].duration += duration;
374 		if (logicalOffset != data[type].logicalOffsets[$-1])
375 			foreach (i, ref l0; data[type].logicalOffsets)
376 				l0 = i + 1 == Data.logicalOffsets.length ? logicalOffset : data[type].logicalOffsets[i + 1];
377 		if (parent)
378 			parent.addSample(type, logicalOffset, duration);
379 	}
380 
381 	void addDistributedSample(double share)
382 	{
383 		distributedSamples += share;
384 		if (parent)
385 			parent.addDistributedSample(share);
386 	}
387 
388 	/// Other paths this address is reachable via,
389 	/// and samples seen from those addresses
390 	HashMap!(GlobalPath, size_t, CasualAllocator, generateHash!GlobalPath, false, false) seenAs;
391 }
392 
393 // We prefix "special" names with one NUL character to
394 // distinguish them from filesystem entries.
395 bool skipOverNul(C)(ref C[] str)
396 {
397 	// Workaround for https://issues.dlang.org/show_bug.cgi?id=22302
398 	if (str.startsWith("\0"))
399 	{
400 		str = str[1 .. $];
401 		return true;
402 	}
403 	return false;
404 }
405 
406 /// Inline string type.
407 alias InlineString(size_t size) = InlineArr!(immutable(char), size);
408 
409 union InlineArr(T, size_t size)
410 {
411 private:
412 	static assert(size * T.sizeof > T[].sizeof);
413 	alias InlineSize = ubyte;
414 	static assert(size < InlineSize.max);
415 
416 	T[] str;
417 	struct
418 	{
419 		T[size] inlineBuf;
420 		InlineSize inlineLength;
421 	}
422 
423 public:
424 	this(in Unqual!T[] s)
425 	{
426 		if (s.length > size)
427 			str = growAllocator.makeArray!T(s[]);
428 		else
429 		{
430 			inlineBuf[0 .. s.length] = s;
431 			inlineLength = cast(InlineSize)s.length;
432 		}
433 	}
434 
435 	inout(T)[] opSlice() inout
436 	{
437 		if (inlineLength)
438 			return inlineBuf[0 .. inlineLength];
439 		else
440 			return str;
441 	}
442 
443 	bool opCast(T : bool)() const { return this !is typeof(this).init; }
444 
445 	bool opEquals(ref const InlineArr other) const
446 	{
447 		return this[] == other[];
448 	}
449 }