1 /* 2 * Copyright (C) 2020, 2021, 2022 Vladimir Panteleev <btdu@cy.md> 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 /// Path manipulation and storage 20 module btdu.paths; 21 22 import std.algorithm.comparison; 23 import std.algorithm.iteration; 24 import std.algorithm.searching; 25 import std.array : array; 26 import std.experimental.allocator : makeArray, make; 27 import std.string; 28 import std.traits : Unqual, EnumMembers; 29 30 import ae.utils.appender; 31 import ae.utils.json : JSONName, JSONOptional, JSONFragment; 32 import ae.utils.meta; 33 34 import btdu.alloc; 35 36 import containers.hashmap; 37 import containers.internal.hash : generateHash; 38 39 /// Common definitions for a deduplicated trie for paths. 40 mixin template SimplePath() 41 { 42 // Size selected empirically 43 alias NameString = InlineString!23; 44 45 /// Parent directory 46 typeof(this)* parent; 47 /// Directory items, if any 48 typeof(this)* firstChild; 49 /// Next item in the parent directory, if any 50 typeof(this)* nextSibling; 51 /// Base name 52 /// Names prefixed with a NUL character indicate "special" nodes, 53 /// which do not correspond to a filesystem path. 54 immutable NameString name; 55 56 /*private*/ this(typeof(this)* parent, NameString name) 57 { 58 this.parent = parent; 59 this.name = name; 60 } 61 62 // Returns pointer to pointer to child, or pointer to where it should be added. 63 private inout(typeof(this)*)* find(in char[] name) inout 64 { 65 inout(typeof(this)*)* child; 66 for (child = &firstChild; *child; child = &(*child).nextSibling) 67 if ((*child).name[] == name) 68 break; 69 return child; 70 } 71 72 inout(typeof(this)*) opBinaryRight(string op : "in")(in char[] name) inout { return *find(name); } 73 ref inout(typeof(this)) opIndex(in char[] name) inout { return *(name in this); } 74 75 debug invariant 76 { 77 import btdu.state : importing; 78 if (importing) 79 return; 80 if (name) 81 { 82 assert(parent !is null, "Named node without parent"); 83 // assert((*parent)[name.toString()] is &this, "Child/parent mismatch"); 84 } 85 else // root 86 assert(!parent, "Unnamed node with parent"); 87 } 88 89 /// Append a single path segment to this one. 90 typeof(this)* appendName(in char[] name) 91 { 92 assert(name.length, "Empty path segment"); 93 assert(name.indexOf('/') < 0, "Path segment contains /: " ~ name); 94 auto ppnext = find(name); 95 if (auto pnext = *ppnext) 96 return pnext; 97 else 98 return *ppnext = growAllocator.make!(typeof(this))(&this, NameString(name)); 99 } 100 101 /// ditto 102 private typeof(this)* appendName(NameString name) 103 { 104 auto ppnext = find(name[]); 105 if (auto pnext = *ppnext) 106 return pnext; 107 else 108 return *ppnext = growAllocator.make!(typeof(this))(&this, name); 109 } 110 111 /// Append a normalized relative string path to this one. 112 typeof(this)* appendPath(in char[] path) 113 { 114 auto p = path.indexOf('/'); 115 auto nextName = p < 0 ? path : path[0 .. p]; 116 auto next = appendName(nextName); 117 if (p < 0) 118 return next; 119 else 120 return next.appendPath(path[p + 1 .. $]); 121 } 122 123 /// ditto 124 typeof(this)* appendPath(in SubPath* path) 125 { 126 typeof(this)* recurse(typeof(this)* base, in SubPath* path) 127 { 128 if (!path.parent) // root 129 return base; 130 base = recurse(base, path.parent); 131 return base.appendName(path.name); 132 } 133 134 return recurse(&this, path); 135 } 136 137 /// ditto 138 typeof(this)* appendPath(in GlobalPath* path) 139 { 140 typeof(this)* recurse(typeof(this)* base, in GlobalPath* path) 141 { 142 if (path.parent) 143 base = recurse(base, path.parent); 144 return base.appendPath(path.subPath); 145 } 146 147 return recurse(&this, path); 148 } 149 150 /// Perform the reverse operation, returning a parent path, 151 /// or `null` if `path` is not a suffix of `this`. 152 typeof(this)* unappendPath(in SubPath* path) 153 { 154 typeof(this)* recurse(typeof(this)* base, in SubPath* path) 155 { 156 if (!path.parent) // root 157 return base; 158 if (!base.parent) 159 return null; 160 if (path.name[] != base.name[]) 161 return null; 162 return recurse(base.parent, path.parent); 163 } 164 165 return recurse(&this, path); 166 } 167 168 /// ditto 169 typeof(this)* unappendPath(in GlobalPath* path) 170 { 171 typeof(this)* recurse(typeof(this)* base, in GlobalPath* path) 172 { 173 if (!path) // root 174 return base; 175 base = base.unappendPath(path.subPath); 176 if (!base) 177 return null; 178 return recurse(base, path.parent); 179 } 180 181 return recurse(&this, path); 182 } 183 184 /// Return an iterator for path fragments. 185 /// Iterates from inner-most to top level. 186 auto range() const 187 { 188 alias This = typeof(this)*; 189 static struct Range 190 { 191 This p; 192 bool empty() const { return !p; } 193 string front() { return p.name[]; } 194 void popFront() { p = p.parent; } 195 } 196 return Range(&this); 197 } 198 199 void toString(scope void delegate(const(char)[]) sink) const 200 { 201 if (parent) 202 { 203 parent.toString(sink); 204 sink("/"); 205 } 206 sink(humanName); 207 } 208 209 string humanName() const 210 { 211 string humanName = name[]; 212 humanName.skipOverNul(); 213 return humanName; 214 } 215 } 216 217 /// Common operations for linked-list-like path structures 218 mixin template PathCommon() 219 { 220 /// Returns the total length of this path chain, 221 /// including this instance. 222 private size_t chainLength() const 223 { 224 return 1 + (parent ? parent.chainLength() : 0); 225 } 226 227 /// Returns the common prefix of `paths`. 228 /// Assumes that if two pointers are different, they point at different paths. 229 /// Destructively mutates `paths` as scratch space. 230 static typeof(this)* commonPrefix(typeof(this)*[] paths) 231 { 232 // First, calculate the lengths 233 static FastAppender!size_t lengths; 234 lengths.clear(); 235 foreach (ref path; paths) 236 lengths.put(path.chainLength); 237 238 // Rewind all paths to the minimal path's length 239 auto minLength = lengths.get().reduce!min; 240 foreach (i, ref path; paths) 241 while (lengths.get()[i] > minLength) 242 { 243 lengths.get()[i]--; 244 path = path.parent; 245 } 246 247 // Rewind all paths until the tip points at the same thing 248 while (paths.any!(path => path !is paths[0])) 249 foreach (ref path; paths) 250 path = path.parent; 251 252 // All paths now point at the same thing. 253 return paths[0]; 254 } 255 } 256 257 /// Implements comparison for linked-list-like path structures. 258 /// Requires `PathCommon` and a `compareContents` definition. 259 mixin template PathCmp() 260 { 261 int opCmp(const ref typeof(this) b) const 262 { 263 if (this is b) 264 return 0; 265 266 // Because the lengths may be uneven, query them first 267 auto aLength = this.chainLength(); 268 auto bLength = b .chainLength(); 269 auto maxLength = max(aLength, bLength); 270 271 // We are starting from the tail end of two 272 // linked lists with possibly different length 273 int recurse( 274 // The tail so far 275 in typeof(this)*[2] paths, 276 // How many nodes this side is "shorter" by 277 size_t[2] rem, 278 ) 279 { 280 if (paths[0] is paths[1]) 281 return 0; // Also covers the [null, null] case which stops recursion 282 283 // What we will recurse with 284 const(typeof(this))*[2] recPaths; 285 size_t[2] recRem; 286 // What we will compare in this step (if recursion returns 0) 287 const(typeof(this))*[2] thisPaths; 288 289 foreach (n; 0 .. 2) 290 { 291 if (rem[n]) 292 { 293 thisPaths[n] = null; 294 recPaths[n] = paths[n]; 295 recRem[n] = rem[n] - 1; 296 } 297 else 298 { 299 thisPaths[n] = paths[n]; 300 recPaths[n] = paths[n].parent; 301 recRem[n] = 0; 302 } 303 } 304 305 int res = recurse(recPaths, recRem); 306 if (res) 307 return res; 308 309 if ((thisPaths[0] is null) != (thisPaths[1] is null)) 310 return thisPaths[0] is null ? -1 : 1; 311 return thisPaths[0].compareContents(*thisPaths[1]); 312 } 313 return recurse([&this, &b], [ 314 maxLength - aLength, 315 maxLength - bLength, 316 ]); 317 } 318 } 319 320 /// Path within a tree (subvolume) 321 struct SubPath 322 { 323 mixin SimplePath; 324 mixin PathCommon; 325 mixin PathCmp; 326 327 /// PathCmp implementation 328 private int compareContents(const ref typeof(this) b) const 329 { 330 return cmp(name[], b.name[]); 331 } 332 } 333 334 /// Global path (spanning multiple trees) 335 /// This is to allow efficiently representing paths where the prefix 336 /// (subvolume path) varies, e.g.: 337 /// - /@root/usr/lib/libfoo.so.1.0.0 338 /// - /backups/@root-20200101000000/usr/lib/libfoo.so.1.0.0 339 /// - /backups/@root-20200102000000/usr/lib/libfoo.so.1.0.0 340 /// etc. 341 /// Here we can store /backups/@root-20200102000000 etc. as one 342 /// SubPath and /usr/lib/libfoo.so.1.0.0 as another, with the 343 /// GlobalPath representing a concatenation of the two. 344 struct GlobalPath 345 { 346 GlobalPath* parent; /// Parent tree (or null if none) 347 SubPath* subPath; /// Path within this filesystem 348 349 void toString(scope void delegate(const(char)[]) sink) const 350 { 351 if (parent) 352 parent.toString(sink); 353 subPath.toString(sink); 354 } 355 356 size_t length() const 357 { 358 size_t length = 0; 359 toString((const(char)[] s) { length += s.length; }); 360 return length; 361 } 362 363 /// PathCmp implementation 364 private int compareContents(const ref typeof(this) b) const 365 { 366 return subPath.opCmp(*b.subPath); 367 } 368 369 /// Return an iterator for subpaths. 370 /// Iterates from inner-most to top level. 371 auto range() const 372 { 373 static struct Range 374 { 375 const(GlobalPath)* p; 376 bool empty() const { return !p; } 377 const(SubPath)* front() { return p.subPath; } 378 void popFront() { p = p.parent; } 379 } 380 return Range(&this); 381 } 382 383 mixin PathCommon; 384 mixin PathCmp; 385 } 386 387 enum SampleType 388 { 389 represented, 390 exclusive, 391 shared_, 392 } 393 394 /// Browser path (GUI hierarchy) 395 struct BrowserPath 396 { 397 mixin SimplePath; 398 mixin PathCommon; 399 400 struct Data 401 { 402 ulong samples; /// For non-leaves, sum of leaves 403 ulong duration; /// Total hnsecs 404 ulong[3] logicalOffsets = -1; /// Examples (the last 3 seen) of logical offsets 405 } 406 Data[enumLength!SampleType] data; 407 double distributedSamples = 0; 408 bool deleting; 409 410 void addSample(SampleType type, ulong logicalOffset, ulong duration) 411 { 412 addSamples(type, 1, (&logicalOffset)[0..1], duration); 413 } 414 415 void addSamples(SampleType type, ulong samples, ulong[] logicalOffsets, ulong duration) 416 { 417 data[type].samples += samples; 418 data[type].duration += duration; 419 foreach (logicalOffset; logicalOffsets) 420 if (logicalOffset != data[type].logicalOffsets[$-1]) 421 foreach (i, ref l0; data[type].logicalOffsets) 422 l0 = i + 1 == Data.logicalOffsets.length ? logicalOffset : data[type].logicalOffsets[i + 1]; 423 if (parent) 424 parent.addSamples(type, samples, logicalOffsets, duration); 425 } 426 427 void removeSamples(SampleType type, ulong samples, ulong[] logicalOffsets, ulong duration) 428 { 429 assert(samples <= data[type].samples && duration <= data[type].duration); 430 data[type].samples -= samples; 431 data[type].duration -= duration; 432 foreach (i, lMy; data[type].logicalOffsets) 433 if (logicalOffsets.canFind(lMy)) 434 foreach_reverse (j; 0 .. i + 1) 435 data[type].logicalOffsets = j == 0 ? -1 : data[type].logicalOffsets[j + 1]; 436 if (parent) 437 parent.removeSamples(type, samples, logicalOffsets, duration); 438 } 439 440 void addDistributedSample(double share) 441 { 442 distributedSamples += share; 443 if (parent) 444 parent.addDistributedSample(share); 445 } 446 447 void removeDistributedSample(double share) 448 { 449 addDistributedSample(-share); 450 } 451 452 /// Other paths this address is reachable via, 453 /// and samples seen from those addresses 454 HashMap!(GlobalPath, size_t, CasualAllocator, generateHash!GlobalPath, false, false) seenAs; 455 456 /// Serialized representation 457 struct SerializedForm 458 { 459 string name; 460 461 struct SampleData 462 { 463 // Same order as SampleType 464 @JSONOptional Data represented; 465 @JSONOptional Data exclusive; 466 @JSONName("shared") 467 @JSONOptional Data shared_; 468 @JSONOptional JSONFragment distributedSamples = JSONFragment("0"); 469 } 470 SampleData data; 471 472 BrowserPath*[] children; 473 } 474 475 SerializedForm toJSON() 476 { 477 SerializedForm s; 478 s.name = this.name[]; 479 for (auto p = firstChild; p; p = p.nextSibling) 480 s.children ~= p; 481 static foreach (sampleType; EnumMembers!SampleType) 482 s.data.tupleof[sampleType] = data[sampleType]; 483 if (this.distributedSamples !is 0.) 484 s.data.distributedSamples.json = this.distributedSamples.format!"%17e"; 485 return s; 486 } 487 488 static BrowserPath fromJSON(ref SerializedForm s) 489 { 490 import std.conv : to; 491 492 auto p = BrowserPath(null, NameString(s.name)); 493 foreach_reverse (child; s.children) 494 { 495 child.nextSibling = p.firstChild; 496 p.firstChild = child; 497 } 498 static foreach (sampleType; EnumMembers!SampleType) 499 p.data[sampleType] = s.data.tupleof[sampleType]; 500 p.distributedSamples = s.data.distributedSamples.json.strip.to!double; 501 return p; 502 } 503 504 void resetParents() 505 { 506 for (auto p = firstChild; p; p = p.nextSibling) 507 { 508 p.parent = &this; 509 p.resetParents(); 510 } 511 } 512 513 /// Approximate the effect of deleting the filesystem object represented by the path. 514 void remove() 515 { 516 assert(parent); 517 518 // Mark this subtree for deletion, to aid the rebalancing below. 519 markForDeletion(); 520 521 // Rebalance the hierarchy's statistics by updating and moving sample data as needed. 522 evict(); 523 524 // Unlink this node, removing it from the tree. 525 { 526 auto pp = parent.find(this.name[]); 527 assert(*pp == &this); 528 *pp = this.nextSibling; 529 } 530 } 531 532 // Mark this subtree for deletion, to aid the rebalancing below. 533 private void markForDeletion() 534 { 535 deleting = true; 536 for (auto p = firstChild; p; p = p.nextSibling) 537 p.markForDeletion(); 538 } 539 540 /// Clear all samples or move them elsewhere. 541 private void evict() 542 { 543 assert(parent); 544 545 // Evict children first 546 for (auto p = firstChild; p; p = p.nextSibling) 547 p.evict(); 548 549 // Save this node's remaining stats before we remove them. 550 auto data = this.data; 551 auto distributedSamples = this.distributedSamples; 552 553 // Remove sample data from this node and its parents. 554 // After recursion, for non-leaf nodes, most of these should now be at zero (as far as we can estimate). 555 static foreach (sampleType; EnumMembers!SampleType) 556 if (data[sampleType].samples) // avoid quadratic complexity 557 removeSamples(sampleType, data[sampleType].samples, data[sampleType].logicalOffsets[], data[sampleType].duration); 558 if (distributedSamples) // avoid quadratic complexity 559 removeDistributedSample(distributedSamples); 560 561 if (seenAs.empty) 562 return; // Directory (non-leaf) node - nothing else to do here. 563 564 // For leaf nodes, some stats can be redistributed to other references. 565 // We need to do some path calculations first, 566 // such as inferring the GlobalPath from the BrowserPath and seenAs. 567 BrowserPath* root; 568 foreach (ref otherPath; seenAs.byKey) 569 { 570 root = this.unappendPath(&otherPath); 571 if (root) 572 break; 573 } 574 debug assert(root); 575 if (!root) 576 return; 577 578 // These paths will inherit the remains. 579 auto remainingPaths = seenAs.byKey 580 .filter!(otherPath => !root.appendPath(&otherPath).deleting) 581 .array; 582 583 // Redistribute to siblings 584 if (!remainingPaths.empty) 585 { 586 auto newRepresentativePath = selectRepresentativePath(remainingPaths); 587 foreach (ref remainingPath; remainingPaths) 588 { 589 // Redistribute samples 590 if (remainingPath == newRepresentativePath) 591 root.appendPath(&remainingPath).addSamples( 592 SampleType.represented, 593 data[SampleType.represented].samples, 594 data[SampleType.represented].logicalOffsets[], 595 data[SampleType.represented].duration, 596 ); 597 if (distributedSamples) 598 root.appendPath(&remainingPath).addDistributedSample(distributedSamples / remainingPaths.length); 599 } 600 } 601 } 602 } 603 604 GlobalPath selectRepresentativePath(GlobalPath[] paths) 605 { 606 return paths.fold!((a, b) { 607 // Prefer paths with resolved roots 608 auto aResolved = a.isResolved(); 609 auto bResolved = b.isResolved(); 610 if (aResolved != bResolved) 611 return aResolved ? a : b; 612 // Shortest path always wins 613 auto aLength = a.length; 614 auto bLength = b.length; 615 if (aLength != bLength) 616 return aLength < bLength ? a : b; 617 // If the length is the same, pick the lexicographically smallest one 618 return a < b ? a : b; 619 })(); 620 } 621 622 private bool isResolved(ref GlobalPath p) 623 { 624 return !p.range 625 .map!(g => g.range) 626 .joiner 627 .canFind!(n => n.startsWith("\0TREE_")); 628 } 629 630 // We prefix "special" names with one NUL character to 631 // distinguish them from filesystem entries. 632 bool skipOverNul(C)(ref C[] str) 633 { 634 // Workaround for https://issues.dlang.org/show_bug.cgi?id=22302 635 if (str.startsWith("\0")) 636 { 637 str = str[1 .. $]; 638 return true; 639 } 640 return false; 641 } 642 643 /// Inline string type. 644 alias InlineString(size_t size) = InlineArr!(immutable(char), size); 645 646 union InlineArr(T, size_t size) 647 { 648 private: 649 static assert(size * T.sizeof > T[].sizeof); 650 alias InlineSize = ubyte; 651 static assert(size < InlineSize.max); 652 653 T[] str; 654 struct 655 { 656 T[size] inlineBuf; 657 InlineSize inlineLength; 658 } 659 660 public: 661 this(in Unqual!T[] s) 662 { 663 if (s.length > size) 664 str = growAllocator.makeArray!T(s[]); 665 else 666 { 667 inlineBuf[0 .. s.length] = s; 668 inlineLength = cast(InlineSize)s.length; 669 } 670 } 671 672 inout(T)[] opSlice() inout 673 { 674 if (inlineLength) 675 return inlineBuf[0 .. inlineLength]; 676 else 677 return str; 678 } 679 680 bool opCast(T : bool)() const { return this !is typeof(this).init; } 681 682 bool opEquals(ref const InlineArr other) const 683 { 684 return this[] == other[]; 685 } 686 }