1 /*
2  * Copyright (C) 2020, 2021, 2022  Vladimir Panteleev <btdu@cy.md>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 /// Path manipulation and storage
20 module btdu.paths;
21 
22 import std.algorithm.comparison;
23 import std.algorithm.iteration;
24 import std.algorithm.searching;
25 import std.array : array;
26 import std.experimental.allocator : makeArray, make;
27 import std.string;
28 import std.traits : Unqual, EnumMembers;
29 
30 import ae.utils.appender;
31 import ae.utils.json : JSONName, JSONOptional, JSONFragment;
32 import ae.utils.meta;
33 
34 import btdu.alloc;
35 
36 import containers.hashmap;
37 import containers.internal.hash : generateHash;
38 
39 /// Common definitions for a deduplicated trie for paths.
40 mixin template SimplePath()
41 {
42 	// Size selected empirically
43 	alias NameString = InlineString!23;
44 
45 	/// Parent directory
46 	typeof(this)* parent;
47 	/// Directory items, if any
48 	typeof(this)* firstChild;
49 	/// Next item in the parent directory, if any
50 	typeof(this)* nextSibling;
51 	/// Base name
52 	/// Names prefixed with a NUL character indicate "special" nodes,
53 	/// which do not correspond to a filesystem path.
54 	immutable NameString name;
55 
56 	/*private*/ this(typeof(this)* parent, NameString name)
57 	{
58 		this.parent = parent;
59 		this.name = name;
60 	}
61 
62 	// Returns pointer to pointer to child, or pointer to where it should be added.
63 	private inout(typeof(this)*)* find(in char[] name) inout
64 	{
65 		inout(typeof(this)*)* child;
66 		for (child = &firstChild; *child; child = &(*child).nextSibling)
67 			if ((*child).name[] == name)
68 				break;
69 		return child;
70 	}
71 
72 	inout(typeof(this)*) opBinaryRight(string op : "in")(in char[] name) inout { return *find(name); }
73 	ref inout(typeof(this)) opIndex(in char[] name) inout { return *(name in this); }
74 
75 	debug invariant
76 	{
77 		import btdu.state : importing;
78 		if (importing)
79 			return;
80 		if (name)
81 		{
82 			assert(parent !is null, "Named node without parent");
83 			// assert((*parent)[name.toString()] is &this, "Child/parent mismatch");
84 		}
85 		else // root
86 			assert(!parent, "Unnamed node with parent");
87 	}
88 
89 	/// Append a single path segment to this one.
90 	typeof(this)* appendName(in char[] name)
91 	{
92 		assert(name.length, "Empty path segment");
93 		assert(name.indexOf('/') < 0, "Path segment contains /: " ~ name);
94 		auto ppnext = find(name);
95 		if (auto pnext = *ppnext)
96 			return pnext;
97 		else
98 			return *ppnext = growAllocator.make!(typeof(this))(&this, NameString(name));
99 	}
100 
101 	/// ditto
102 	private typeof(this)* appendName(NameString name)
103 	{
104 		auto ppnext = find(name[]);
105 		if (auto pnext = *ppnext)
106 			return pnext;
107 		else
108 			return *ppnext = growAllocator.make!(typeof(this))(&this, name);
109 	}
110 
111 	/// Append a normalized relative string path to this one.
112 	typeof(this)* appendPath(in char[] path)
113 	{
114 		auto p = path.indexOf('/');
115 		auto nextName = p < 0 ? path : path[0 .. p];
116 		auto next = appendName(nextName);
117 		if (p < 0)
118 			return next;
119 		else
120 			return next.appendPath(path[p + 1 .. $]);
121 	}
122 
123 	/// ditto
124 	typeof(this)* appendPath(in SubPath* path)
125 	{
126 		typeof(this)* recurse(typeof(this)* base, in SubPath* path)
127 		{
128 			if (!path.parent) // root
129 				return base;
130 			base = recurse(base, path.parent);
131 			return base.appendName(path.name);
132 		}
133 
134 		return recurse(&this, path);
135 	}
136 
137 	/// ditto
138 	typeof(this)* appendPath(in GlobalPath* path)
139 	{
140 		typeof(this)* recurse(typeof(this)* base, in GlobalPath* path)
141 		{
142 			if (path.parent)
143 				base = recurse(base, path.parent);
144 			return base.appendPath(path.subPath);
145 		}
146 
147 		return recurse(&this, path);
148 	}
149 
150 	/// Perform the reverse operation, returning a parent path,
151 	/// or `null` if `path` is not a suffix of `this`.
152 	typeof(this)* unappendPath(in SubPath* path)
153 	{
154 		typeof(this)* recurse(typeof(this)* base, in SubPath* path)
155 		{
156 			if (!path.parent) // root
157 				return base;
158 			if (!base.parent)
159 				return null;
160 			if (path.name[] != base.name[])
161 				return null;
162 			return recurse(base.parent, path.parent);
163 		}
164 
165 		return recurse(&this, path);
166 	}
167 
168 	/// ditto
169 	typeof(this)* unappendPath(in GlobalPath* path)
170 	{
171 		typeof(this)* recurse(typeof(this)* base, in GlobalPath* path)
172 		{
173 			if (!path) // root
174 				return base;
175 			base = base.unappendPath(path.subPath);
176 			if (!base)
177 				return null;
178 			return recurse(base, path.parent);
179 		}
180 
181 		return recurse(&this, path);
182 	}
183 
184 	/// Return an iterator for path fragments.
185 	/// Iterates from inner-most to top level.
186 	auto range() const
187 	{
188 		alias This = typeof(this)*;
189 		static struct Range
190 		{
191 			This p;
192 			bool empty() const { return !p; }
193 			string front() { return p.name[]; }
194 			void popFront() { p = p.parent; }
195 		}
196 		return Range(&this);
197 	}
198 
199 	void toString(scope void delegate(const(char)[]) sink) const
200 	{
201 		if (parent)
202 		{
203 			parent.toString(sink);
204 			sink("/");
205 		}
206 		sink(humanName);
207 	}
208 
209 	string humanName() const
210 	{
211 		string humanName = name[];
212 		humanName.skipOverNul();
213 		return humanName;
214 	}
215 }
216 
217 /// Common operations for linked-list-like path structures
218 mixin template PathCommon()
219 {
220 	/// Returns the total length of this path chain,
221 	/// including this instance.
222 	private size_t chainLength() const
223 	{
224 		return 1 + (parent ? parent.chainLength() : 0);
225 	}
226 
227 	/// Returns the common prefix of `paths`.
228 	/// Assumes that if two pointers are different, they point at different paths.
229 	/// Destructively mutates `paths` as scratch space.
230 	static typeof(this)* commonPrefix(typeof(this)*[] paths)
231 	{
232 		// First, calculate the lengths
233 		static FastAppender!size_t lengths;
234 		lengths.clear();
235 		foreach (ref path; paths)
236 			lengths.put(path.chainLength);
237 
238 		// Rewind all paths to the minimal path's length
239 		auto minLength = lengths.get().reduce!min;
240 		foreach (i, ref path; paths)
241 			while (lengths.get()[i] > minLength)
242 			{
243 				lengths.get()[i]--;
244 				path = path.parent;
245 			}
246 
247 		// Rewind all paths until the tip points at the same thing
248 		while (paths.any!(path => path !is paths[0]))
249 			foreach (ref path; paths)
250 				path = path.parent;
251 
252 		// All paths now point at the same thing.
253 		return paths[0];
254 	}
255 }
256 
257 /// Implements comparison for linked-list-like path structures.
258 /// Requires `PathCommon` and a `compareContents` definition.
259 mixin template PathCmp()
260 {
261 	int opCmp(const ref typeof(this) b) const
262 	{
263 		if (this is b)
264 			return 0;
265 
266 		// Because the lengths may be uneven, query them first
267 		auto aLength = this.chainLength();
268 		auto bLength = b   .chainLength();
269 		auto maxLength = max(aLength, bLength);
270 
271 		// We are starting from the tail end of two
272 		// linked lists with possibly different length
273 		int recurse(
274 			// The tail so far
275 			in typeof(this)*[2] paths,
276 			// How many nodes this side is "shorter" by
277 			size_t[2] rem,
278 		)
279 		{
280 			if (paths[0] is paths[1])
281 				return 0; // Also covers the [null, null] case which stops recursion
282 
283 			// What we will recurse with
284 			const(typeof(this))*[2] recPaths;
285 			size_t[2] recRem;
286 			// What we will compare in this step (if recursion returns 0)
287 			const(typeof(this))*[2] thisPaths;
288 
289 			foreach (n; 0 .. 2)
290 			{
291 				if (rem[n])
292 				{
293 					thisPaths[n] = null;
294 					recPaths[n] = paths[n];
295 					recRem[n] = rem[n] - 1;
296 				}
297 				else
298 				{
299 					thisPaths[n] = paths[n];
300 					recPaths[n] = paths[n].parent;
301 					recRem[n] = 0;
302 				}
303 			}
304 
305 			int res = recurse(recPaths, recRem);
306 			if (res)
307 				return res;
308 
309 			if ((thisPaths[0] is null) != (thisPaths[1] is null))
310 				return thisPaths[0] is null ? -1 : 1;
311 			return thisPaths[0].compareContents(*thisPaths[1]);
312 		}
313 		return recurse([&this, &b], [
314 			maxLength - aLength,
315 			maxLength - bLength,
316 		]);
317 	}
318 }
319 
320 /// Path within a tree (subvolume)
321 struct SubPath
322 {
323 	mixin SimplePath;
324 	mixin PathCommon;
325 	mixin PathCmp;
326 
327 	/// PathCmp implementation
328 	private int compareContents(const ref typeof(this) b) const
329 	{
330 		return cmp(name[], b.name[]);
331 	}
332 }
333 
334 /// Global path (spanning multiple trees)
335 /// This is to allow efficiently representing paths where the prefix
336 /// (subvolume path) varies, e.g.:
337 /// - /@root/usr/lib/libfoo.so.1.0.0
338 /// - /backups/@root-20200101000000/usr/lib/libfoo.so.1.0.0
339 /// - /backups/@root-20200102000000/usr/lib/libfoo.so.1.0.0
340 /// etc.
341 /// Here we can store /backups/@root-20200102000000 etc. as one
342 /// SubPath and /usr/lib/libfoo.so.1.0.0 as another, with the
343 /// GlobalPath representing a concatenation of the two.
344 struct GlobalPath
345 {
346 	GlobalPath* parent; /// Parent tree (or null if none)
347 	SubPath* subPath;   /// Path within this filesystem
348 
349 	void toString(scope void delegate(const(char)[]) sink) const
350 	{
351 		if (parent)
352 			parent.toString(sink);
353 		subPath.toString(sink);
354 	}
355 
356 	size_t length() const
357 	{
358 		size_t length = 0;
359 		toString((const(char)[] s) { length += s.length; });
360 		return length;
361 	}
362 
363 	/// PathCmp implementation
364 	private int compareContents(const ref typeof(this) b) const
365 	{
366 		return subPath.opCmp(*b.subPath);
367 	}
368 
369 	/// Return an iterator for subpaths.
370 	/// Iterates from inner-most to top level.
371 	auto range() const
372 	{
373 		static struct Range
374 		{
375 			const(GlobalPath)* p;
376 			bool empty() const { return !p; }
377 			const(SubPath)* front() { return p.subPath; }
378 			void popFront() { p = p.parent; }
379 		}
380 		return Range(&this);
381 	}
382 
383 	mixin PathCommon;
384 	mixin PathCmp;
385 }
386 
387 enum SampleType
388 {
389 	represented,
390 	exclusive,
391 	shared_,
392 }
393 
394 /// Browser path (GUI hierarchy)
395 struct BrowserPath
396 {
397 	mixin SimplePath;
398 	mixin PathCommon;
399 
400 	struct Data
401 	{
402 		ulong samples; /// For non-leaves, sum of leaves
403 		ulong duration; /// Total hnsecs
404 		ulong[3] logicalOffsets = -1; /// Examples (the last 3 seen) of logical offsets
405 	}
406 	Data[enumLength!SampleType] data;
407 	double distributedSamples = 0;
408 	bool deleting;
409 
410 	void addSample(SampleType type, ulong logicalOffset, ulong duration)
411 	{
412 		addSamples(type, 1, (&logicalOffset)[0..1], duration);
413 	}
414 
415 	void addSamples(SampleType type, ulong samples, ulong[] logicalOffsets, ulong duration)
416 	{
417 		data[type].samples += samples;
418 		data[type].duration += duration;
419 		foreach (logicalOffset; logicalOffsets)
420 			if (logicalOffset != data[type].logicalOffsets[$-1])
421 				foreach (i, ref l0; data[type].logicalOffsets)
422 					l0 = i + 1 == Data.logicalOffsets.length ? logicalOffset : data[type].logicalOffsets[i + 1];
423 		if (parent)
424 			parent.addSamples(type, samples, logicalOffsets, duration);
425 	}
426 
427 	void removeSamples(SampleType type, ulong samples, ulong[] logicalOffsets, ulong duration)
428 	{
429 		assert(samples <= data[type].samples && duration <= data[type].duration);
430 		data[type].samples -= samples;
431 		data[type].duration -= duration;
432 		foreach (i, lMy; data[type].logicalOffsets)
433 			if (logicalOffsets.canFind(lMy))
434 				foreach_reverse (j; 0 .. i + 1)
435 					data[type].logicalOffsets = j == 0 ? -1 : data[type].logicalOffsets[j + 1];
436 		if (parent)
437 			parent.removeSamples(type, samples, logicalOffsets, duration);
438 	}
439 
440 	void addDistributedSample(double share)
441 	{
442 		distributedSamples += share;
443 		if (parent)
444 			parent.addDistributedSample(share);
445 	}
446 
447 	void removeDistributedSample(double share)
448 	{
449 		addDistributedSample(-share);
450 	}
451 
452 	/// Other paths this address is reachable via,
453 	/// and samples seen from those addresses
454 	HashMap!(GlobalPath, size_t, CasualAllocator, generateHash!GlobalPath, false, false) seenAs;
455 
456 	/// Serialized representation
457 	struct SerializedForm
458 	{
459 		string name;
460 
461 		struct SampleData
462 		{
463 			// Same order as SampleType
464 			@JSONOptional Data represented;
465 			@JSONOptional Data exclusive;
466 			@JSONName("shared")
467 			@JSONOptional Data shared_;
468 			@JSONOptional JSONFragment distributedSamples = JSONFragment("0");
469 		}
470 		SampleData data;
471 
472 		BrowserPath*[] children;
473 	}
474 
475 	SerializedForm toJSON()
476 	{
477 		SerializedForm s;
478 		s.name = this.name[];
479 		for (auto p = firstChild; p; p = p.nextSibling)
480 			s.children ~= p;
481 		static foreach (sampleType; EnumMembers!SampleType)
482 			s.data.tupleof[sampleType] = data[sampleType];
483 		if (this.distributedSamples !is 0.)
484 			s.data.distributedSamples.json = this.distributedSamples.format!"%17e";
485 		return s;
486 	}
487 
488 	static BrowserPath fromJSON(ref SerializedForm s)
489 	{
490 		import std.conv : to;
491 
492 		auto p = BrowserPath(null, NameString(s.name));
493 		foreach_reverse (child; s.children)
494 		{
495 			child.nextSibling = p.firstChild;
496 			p.firstChild = child;
497 		}
498 		static foreach (sampleType; EnumMembers!SampleType)
499 			p.data[sampleType] = s.data.tupleof[sampleType];
500 		p.distributedSamples = s.data.distributedSamples.json.strip.to!double;
501 		return p;
502 	}
503 
504 	void resetParents()
505 	{
506 		for (auto p = firstChild; p; p = p.nextSibling)
507 		{
508 			p.parent = &this;
509 			p.resetParents();
510 		}
511 	}
512 
513 	/// Approximate the effect of deleting the filesystem object represented by the path.
514 	void remove()
515 	{
516 		assert(parent);
517 
518 		// Mark this subtree for deletion, to aid the rebalancing below.
519 		markForDeletion();
520 
521 		// Rebalance the hierarchy's statistics by updating and moving sample data as needed.
522 		evict();
523 
524 		// Unlink this node, removing it from the tree.
525 		{
526 			auto pp = parent.find(this.name[]);
527 			assert(*pp == &this);
528 			*pp = this.nextSibling;
529 		}
530 	}
531 
532 	// Mark this subtree for deletion, to aid the rebalancing below.
533 	private void markForDeletion()
534 	{
535 		deleting = true;
536 		for (auto p = firstChild; p; p = p.nextSibling)
537 			p.markForDeletion();
538 	}
539 
540 	/// Clear all samples or move them elsewhere.
541 	private void evict()
542 	{
543 		assert(parent);
544 
545 		// Evict children first
546 		for (auto p = firstChild; p; p = p.nextSibling)
547 			p.evict();
548 
549 		// Save this node's remaining stats before we remove them.
550 		auto data = this.data;
551 		auto distributedSamples = this.distributedSamples;
552 
553 		// Remove sample data from this node and its parents.
554 		// After recursion, for non-leaf nodes, most of these should now be at zero (as far as we can estimate).
555 		static foreach (sampleType; EnumMembers!SampleType)
556 			if (data[sampleType].samples) // avoid quadratic complexity
557 				removeSamples(sampleType, data[sampleType].samples, data[sampleType].logicalOffsets[], data[sampleType].duration);
558 		if (distributedSamples) // avoid quadratic complexity
559 			removeDistributedSample(distributedSamples);
560 
561 		if (seenAs.empty)
562 			return;  // Directory (non-leaf) node - nothing else to do here.
563 
564 		// For leaf nodes, some stats can be redistributed to other references.
565 		// We need to do some path calculations first,
566 		// such as inferring the GlobalPath from the BrowserPath and seenAs.
567 		BrowserPath* root;
568 		foreach (ref otherPath; seenAs.byKey)
569 		{
570 			root = this.unappendPath(&otherPath);
571 			if (root)
572 				break;
573 		}
574 		debug assert(root);
575 		if (!root)
576 			return;
577 
578 		// These paths will inherit the remains.
579 		auto remainingPaths = seenAs.byKey
580 			.filter!(otherPath => !root.appendPath(&otherPath).deleting)
581 			.array;
582 
583 		// Redistribute to siblings
584 		if (!remainingPaths.empty)
585 		{
586 			auto newRepresentativePath = selectRepresentativePath(remainingPaths);
587 			foreach (ref remainingPath; remainingPaths)
588 			{
589 				// Redistribute samples
590 				if (remainingPath == newRepresentativePath)
591 					root.appendPath(&remainingPath).addSamples(
592 						SampleType.represented,
593 						data[SampleType.represented].samples,
594 						data[SampleType.represented].logicalOffsets[],
595 						data[SampleType.represented].duration,
596 					);
597 				if (distributedSamples)
598 					root.appendPath(&remainingPath).addDistributedSample(distributedSamples / remainingPaths.length);
599 			}
600 		}
601 	}
602 }
603 
604 GlobalPath selectRepresentativePath(GlobalPath[] paths)
605 {
606 	return paths.fold!((a, b) {
607 		// Prefer paths with resolved roots
608 		auto aResolved = a.isResolved();
609 		auto bResolved = b.isResolved();
610 		if (aResolved != bResolved)
611 			return aResolved ? a : b;
612 		// Shortest path always wins
613 		auto aLength = a.length;
614 		auto bLength = b.length;
615 		if (aLength != bLength)
616 			return aLength < bLength ? a : b;
617 		// If the length is the same, pick the lexicographically smallest one
618 		return a < b ? a : b;
619 	})();
620 }
621 
622 private bool isResolved(ref GlobalPath p)
623 {
624 	return !p.range
625 		.map!(g => g.range)
626 		.joiner
627 		.canFind!(n => n.startsWith("\0TREE_"));
628 }
629 
630 // We prefix "special" names with one NUL character to
631 // distinguish them from filesystem entries.
632 bool skipOverNul(C)(ref C[] str)
633 {
634 	// Workaround for https://issues.dlang.org/show_bug.cgi?id=22302
635 	if (str.startsWith("\0"))
636 	{
637 		str = str[1 .. $];
638 		return true;
639 	}
640 	return false;
641 }
642 
643 /// Inline string type.
644 alias InlineString(size_t size) = InlineArr!(immutable(char), size);
645 
646 union InlineArr(T, size_t size)
647 {
648 private:
649 	static assert(size * T.sizeof > T[].sizeof);
650 	alias InlineSize = ubyte;
651 	static assert(size < InlineSize.max);
652 
653 	T[] str;
654 	struct
655 	{
656 		T[size] inlineBuf;
657 		InlineSize inlineLength;
658 	}
659 
660 public:
661 	this(in Unqual!T[] s)
662 	{
663 		if (s.length > size)
664 			str = growAllocator.makeArray!T(s[]);
665 		else
666 		{
667 			inlineBuf[0 .. s.length] = s;
668 			inlineLength = cast(InlineSize)s.length;
669 		}
670 	}
671 
672 	inout(T)[] opSlice() inout
673 	{
674 		if (inlineLength)
675 			return inlineBuf[0 .. inlineLength];
676 		else
677 			return str;
678 	}
679 
680 	bool opCast(T : bool)() const { return this !is typeof(this).init; }
681 
682 	bool opEquals(ref const InlineArr other) const
683 	{
684 		return this[] == other[];
685 	}
686 }