You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

541 lines
17 KiB

3 years ago
  1. <?php
  2. /**
  3. * @copyright Copyright (c) 2016, ownCloud, Inc.
  4. *
  5. * @author Ari Selseng <ari@selseng.net>
  6. * @author Arthur Schiwon <blizzz@arthur-schiwon.de>
  7. * @author Björn Schießle <bjoern@schiessle.org>
  8. * @author Christoph Wurst <christoph@winzerhof-wurst.at>
  9. * @author Daniel Jagszent <daniel@jagszent.de>
  10. * @author Joas Schilling <coding@schilljs.com>
  11. * @author Jörn Friedrich Dreyer <jfd@butonic.de>
  12. * @author Lukas Reschke <lukas@statuscode.ch>
  13. * @author Martin Mattel <martin.mattel@diemattels.at>
  14. * @author Morris Jobke <hey@morrisjobke.de>
  15. * @author Owen Winkler <a_github@midnightcircus.com>
  16. * @author Robin Appelman <robin@icewind.nl>
  17. * @author Robin McCorkell <robin@mccorkell.me.uk>
  18. * @author Thomas Müller <thomas.mueller@tmit.eu>
  19. * @author Vincent Petry <pvince81@owncloud.com>
  20. *
  21. * @license AGPL-3.0
  22. *
  23. * This code is free software: you can redistribute it and/or modify
  24. * it under the terms of the GNU Affero General Public License, version 3,
  25. * as published by the Free Software Foundation.
  26. *
  27. * This program is distributed in the hope that it will be useful,
  28. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  29. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  30. * GNU Affero General Public License for more details.
  31. *
  32. * You should have received a copy of the GNU Affero General Public License, version 3,
  33. * along with this program. If not, see <http://www.gnu.org/licenses/>
  34. *
  35. */
  36. namespace OC\Files\Cache;
  37. use OC\Files\Filesystem;
  38. use OC\Hooks\BasicEmitter;
  39. use OCP\Files\Cache\IScanner;
  40. use OCP\Files\ForbiddenException;
  41. use OCP\ILogger;
  42. use OCP\Lock\ILockingProvider;
  43. /**
  44. * Class Scanner
  45. *
  46. * Hooks available in scope \OC\Files\Cache\Scanner:
  47. * - scanFile(string $path, string $storageId)
  48. * - scanFolder(string $path, string $storageId)
  49. * - postScanFile(string $path, string $storageId)
  50. * - postScanFolder(string $path, string $storageId)
  51. *
  52. * @package OC\Files\Cache
  53. */
  54. class Scanner extends BasicEmitter implements IScanner {
  55. /**
  56. * @var \OC\Files\Storage\Storage $storage
  57. */
  58. protected $storage;
  59. /**
  60. * @var string $storageId
  61. */
  62. protected $storageId;
  63. /**
  64. * @var \OC\Files\Cache\Cache $cache
  65. */
  66. protected $cache;
  67. /**
  68. * @var boolean $cacheActive If true, perform cache operations, if false, do not affect cache
  69. */
  70. protected $cacheActive;
  71. /**
  72. * @var bool $useTransactions whether to use transactions
  73. */
  74. protected $useTransactions = true;
  75. /**
  76. * @var \OCP\Lock\ILockingProvider
  77. */
  78. protected $lockingProvider;
  79. public function __construct(\OC\Files\Storage\Storage $storage) {
  80. $this->storage = $storage;
  81. $this->storageId = $this->storage->getId();
  82. $this->cache = $storage->getCache();
  83. $this->cacheActive = !\OC::$server->getConfig()->getSystemValue('filesystem_cache_readonly', false);
  84. $this->lockingProvider = \OC::$server->getLockingProvider();
  85. }
  86. /**
  87. * Whether to wrap the scanning of a folder in a database transaction
  88. * On default transactions are used
  89. *
  90. * @param bool $useTransactions
  91. */
  92. public function setUseTransactions($useTransactions) {
  93. $this->useTransactions = $useTransactions;
  94. }
  95. /**
  96. * get all the metadata of a file or folder
  97. * *
  98. *
  99. * @param string $path
  100. * @return array an array of metadata of the file
  101. */
  102. protected function getData($path) {
  103. $data = $this->storage->getMetaData($path);
  104. if (is_null($data)) {
  105. \OCP\Util::writeLog(Scanner::class, "!!! Path '$path' is not accessible or present !!!", ILogger::DEBUG);
  106. }
  107. return $data;
  108. }
  109. /**
  110. * scan a single file and store it in the cache
  111. *
  112. * @param string $file
  113. * @param int $reuseExisting
  114. * @param int $parentId
  115. * @param array|null|false $cacheData existing data in the cache for the file to be scanned
  116. * @param bool $lock set to false to disable getting an additional read lock during scanning
  117. * @param null $data the metadata for the file, as returned by the storage
  118. * @return array an array of metadata of the scanned file
  119. * @throws \OCP\Lock\LockedException
  120. */
  121. public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = null, $lock = true, $data = null) {
  122. if ($file !== '') {
  123. try {
  124. $this->storage->verifyPath(dirname($file), basename($file));
  125. } catch (\Exception $e) {
  126. return null;
  127. }
  128. }
  129. // only proceed if $file is not a partial file nor a blacklisted file
  130. if (!self::isPartialFile($file) and !Filesystem::isFileBlacklisted($file)) {
  131. //acquire a lock
  132. if ($lock) {
  133. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  134. $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  135. }
  136. }
  137. try {
  138. $data = $data ?? $this->getData($file);
  139. } catch (ForbiddenException $e) {
  140. if ($lock) {
  141. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  142. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  143. }
  144. }
  145. return null;
  146. }
  147. try {
  148. if ($data) {
  149. // pre-emit only if it was a file. By that we avoid counting/treating folders as files
  150. if ($data['mimetype'] !== 'httpd/unix-directory') {
  151. $this->emit('\OC\Files\Cache\Scanner', 'scanFile', [$file, $this->storageId]);
  152. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', ['path' => $file, 'storage' => $this->storageId]);
  153. }
  154. $parent = dirname($file);
  155. if ($parent === '.' or $parent === '/') {
  156. $parent = '';
  157. }
  158. if ($parentId === -1) {
  159. $parentId = $this->cache->getParentId($file);
  160. }
  161. // scan the parent if it's not in the cache (id -1) and the current file is not the root folder
  162. if ($file and $parentId === -1) {
  163. $parentData = $this->scanFile($parent);
  164. if (!$parentData) {
  165. return null;
  166. }
  167. $parentId = $parentData['fileid'];
  168. }
  169. if ($parent) {
  170. $data['parent'] = $parentId;
  171. }
  172. if (is_null($cacheData)) {
  173. /** @var CacheEntry $cacheData */
  174. $cacheData = $this->cache->get($file);
  175. }
  176. if ($cacheData and $reuseExisting and isset($cacheData['fileid'])) {
  177. // prevent empty etag
  178. if (empty($cacheData['etag'])) {
  179. $etag = $data['etag'];
  180. } else {
  181. $etag = $cacheData['etag'];
  182. }
  183. $fileId = $cacheData['fileid'];
  184. $data['fileid'] = $fileId;
  185. // only reuse data if the file hasn't explicitly changed
  186. if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
  187. $data['mtime'] = $cacheData['mtime'];
  188. if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
  189. $data['size'] = $cacheData['size'];
  190. }
  191. if ($reuseExisting & self::REUSE_ETAG) {
  192. $data['etag'] = $etag;
  193. }
  194. }
  195. // Only update metadata that has changed
  196. $newData = array_diff_assoc($data, $cacheData->getData());
  197. } else {
  198. $newData = $data;
  199. $fileId = -1;
  200. }
  201. if (!empty($newData)) {
  202. // Reset the checksum if the data has changed
  203. $newData['checksum'] = '';
  204. $newData['parent'] = $parentId;
  205. $data['fileid'] = $this->addToCache($file, $newData, $fileId);
  206. }
  207. if ($cacheData && isset($cacheData['size'])) {
  208. $data['oldSize'] = $cacheData['size'];
  209. } else {
  210. $data['oldSize'] = 0;
  211. }
  212. if ($cacheData && isset($cacheData['encrypted'])) {
  213. $data['encrypted'] = $cacheData['encrypted'];
  214. }
  215. // post-emit only if it was a file. By that we avoid counting/treating folders as files
  216. if ($data['mimetype'] !== 'httpd/unix-directory') {
  217. $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', [$file, $this->storageId]);
  218. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', ['path' => $file, 'storage' => $this->storageId]);
  219. }
  220. } else {
  221. $this->removeFromCache($file);
  222. }
  223. } catch (\Exception $e) {
  224. if ($lock) {
  225. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  226. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  227. }
  228. }
  229. throw $e;
  230. }
  231. //release the acquired lock
  232. if ($lock) {
  233. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  234. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  235. }
  236. }
  237. if ($data && !isset($data['encrypted'])) {
  238. $data['encrypted'] = false;
  239. }
  240. return $data;
  241. }
  242. return null;
  243. }
  244. protected function removeFromCache($path) {
  245. \OC_Hook::emit('Scanner', 'removeFromCache', ['file' => $path]);
  246. $this->emit('\OC\Files\Cache\Scanner', 'removeFromCache', [$path]);
  247. if ($this->cacheActive) {
  248. $this->cache->remove($path);
  249. }
  250. }
  251. /**
  252. * @param string $path
  253. * @param array $data
  254. * @param int $fileId
  255. * @return int the id of the added file
  256. */
  257. protected function addToCache($path, $data, $fileId = -1) {
  258. if (isset($data['scan_permissions'])) {
  259. $data['permissions'] = $data['scan_permissions'];
  260. }
  261. \OC_Hook::emit('Scanner', 'addToCache', ['file' => $path, 'data' => $data]);
  262. $this->emit('\OC\Files\Cache\Scanner', 'addToCache', [$path, $this->storageId, $data]);
  263. if ($this->cacheActive) {
  264. if ($fileId !== -1) {
  265. $this->cache->update($fileId, $data);
  266. return $fileId;
  267. } else {
  268. return $this->cache->insert($path, $data);
  269. }
  270. } else {
  271. return -1;
  272. }
  273. }
  274. /**
  275. * @param string $path
  276. * @param array $data
  277. * @param int $fileId
  278. */
  279. protected function updateCache($path, $data, $fileId = -1) {
  280. \OC_Hook::emit('Scanner', 'addToCache', ['file' => $path, 'data' => $data]);
  281. $this->emit('\OC\Files\Cache\Scanner', 'updateCache', [$path, $this->storageId, $data]);
  282. if ($this->cacheActive) {
  283. if ($fileId !== -1) {
  284. $this->cache->update($fileId, $data);
  285. } else {
  286. $this->cache->put($path, $data);
  287. }
  288. }
  289. }
  290. /**
  291. * scan a folder and all it's children
  292. *
  293. * @param string $path
  294. * @param bool $recursive
  295. * @param int $reuse
  296. * @param bool $lock set to false to disable getting an additional read lock during scanning
  297. * @return array an array of the meta data of the scanned file or folder
  298. */
  299. public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $lock = true) {
  300. if ($reuse === -1) {
  301. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  302. }
  303. if ($lock) {
  304. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  305. $this->storage->acquireLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  306. $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  307. }
  308. }
  309. try {
  310. $data = $this->scanFile($path, $reuse, -1, null, $lock);
  311. if ($data and $data['mimetype'] === 'httpd/unix-directory') {
  312. $size = $this->scanChildren($path, $recursive, $reuse, $data['fileid'], $lock);
  313. $data['size'] = $size;
  314. }
  315. } finally {
  316. if ($lock) {
  317. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  318. $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  319. $this->storage->releaseLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  320. }
  321. }
  322. }
  323. return $data;
  324. }
  325. /**
  326. * Get the children currently in the cache
  327. *
  328. * @param int $folderId
  329. * @return array[]
  330. */
  331. protected function getExistingChildren($folderId) {
  332. $existingChildren = [];
  333. $children = $this->cache->getFolderContentsById($folderId);
  334. foreach ($children as $child) {
  335. $existingChildren[$child['name']] = $child;
  336. }
  337. return $existingChildren;
  338. }
  339. /**
  340. * scan all the files and folders in a folder
  341. *
  342. * @param string $path
  343. * @param bool $recursive
  344. * @param int $reuse
  345. * @param int $folderId id for the folder to be scanned
  346. * @param bool $lock set to false to disable getting an additional read lock during scanning
  347. * @return int the size of the scanned folder or -1 if the size is unknown at this stage
  348. */
  349. protected function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $folderId = null, $lock = true) {
  350. if ($reuse === -1) {
  351. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  352. }
  353. $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', [$path, $this->storageId]);
  354. $size = 0;
  355. if (!is_null($folderId)) {
  356. $folderId = $this->cache->getId($path);
  357. }
  358. $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size);
  359. foreach ($childQueue as $child => $childId) {
  360. $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock);
  361. if ($childSize === -1) {
  362. $size = -1;
  363. } elseif ($size !== -1) {
  364. $size += $childSize;
  365. }
  366. }
  367. if ($this->cacheActive) {
  368. $this->cache->update($folderId, ['size' => $size]);
  369. }
  370. $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', [$path, $this->storageId]);
  371. return $size;
  372. }
  373. private function handleChildren($path, $recursive, $reuse, $folderId, $lock, &$size) {
  374. // we put this in it's own function so it cleans up the memory before we start recursing
  375. $existingChildren = $this->getExistingChildren($folderId);
  376. $newChildren = iterator_to_array($this->storage->getDirectoryContent($path));
  377. if ($this->useTransactions) {
  378. \OC::$server->getDatabaseConnection()->beginTransaction();
  379. }
  380. $exceptionOccurred = false;
  381. $childQueue = [];
  382. $newChildNames = [];
  383. foreach ($newChildren as $fileMeta) {
  384. $file = $fileMeta['name'];
  385. $newChildNames[] = $file;
  386. $child = $path ? $path . '/' . $file : $file;
  387. try {
  388. $existingData = isset($existingChildren[$file]) ? $existingChildren[$file] : false;
  389. $data = $this->scanFile($child, $reuse, $folderId, $existingData, $lock, $fileMeta);
  390. if ($data) {
  391. if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE) {
  392. $childQueue[$child] = $data['fileid'];
  393. } elseif ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE_INCOMPLETE and $data['size'] === -1) {
  394. // only recurse into folders which aren't fully scanned
  395. $childQueue[$child] = $data['fileid'];
  396. } elseif ($data['size'] === -1) {
  397. $size = -1;
  398. } elseif ($size !== -1) {
  399. $size += $data['size'];
  400. }
  401. }
  402. } catch (\Doctrine\DBAL\DBALException $ex) {
  403. // might happen if inserting duplicate while a scanning
  404. // process is running in parallel
  405. // log and ignore
  406. if ($this->useTransactions) {
  407. \OC::$server->getDatabaseConnection()->rollback();
  408. \OC::$server->getDatabaseConnection()->beginTransaction();
  409. }
  410. \OC::$server->getLogger()->logException($ex, [
  411. 'message' => 'Exception while scanning file "' . $child . '"',
  412. 'level' => ILogger::DEBUG,
  413. 'app' => 'core',
  414. ]);
  415. $exceptionOccurred = true;
  416. } catch (\OCP\Lock\LockedException $e) {
  417. if ($this->useTransactions) {
  418. \OC::$server->getDatabaseConnection()->rollback();
  419. }
  420. throw $e;
  421. }
  422. }
  423. $removedChildren = \array_diff(array_keys($existingChildren), $newChildNames);
  424. foreach ($removedChildren as $childName) {
  425. $child = $path ? $path . '/' . $childName : $childName;
  426. $this->removeFromCache($child);
  427. }
  428. if ($this->useTransactions) {
  429. \OC::$server->getDatabaseConnection()->commit();
  430. }
  431. if ($exceptionOccurred) {
  432. // It might happen that the parallel scan process has already
  433. // inserted mimetypes but those weren't available yet inside the transaction
  434. // To make sure to have the updated mime types in such cases,
  435. // we reload them here
  436. \OC::$server->getMimeTypeLoader()->reset();
  437. }
  438. return $childQueue;
  439. }
  440. /**
  441. * check if the file should be ignored when scanning
  442. * NOTE: files with a '.part' extension are ignored as well!
  443. * prevents unfinished put requests to be scanned
  444. *
  445. * @param string $file
  446. * @return boolean
  447. */
  448. public static function isPartialFile($file) {
  449. if (pathinfo($file, PATHINFO_EXTENSION) === 'part') {
  450. return true;
  451. }
  452. if (strpos($file, '.part/') !== false) {
  453. return true;
  454. }
  455. return false;
  456. }
  457. /**
  458. * walk over any folders that are not fully scanned yet and scan them
  459. */
  460. public function backgroundScan() {
  461. if (!$this->cache->inCache('')) {
  462. $this->runBackgroundScanJob(function () {
  463. $this->scan('', self::SCAN_RECURSIVE, self::REUSE_ETAG);
  464. }, '');
  465. } else {
  466. $lastPath = null;
  467. while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
  468. $this->runBackgroundScanJob(function () use ($path) {
  469. $this->scan($path, self::SCAN_RECURSIVE_INCOMPLETE, self::REUSE_ETAG | self::REUSE_SIZE);
  470. }, $path);
  471. // FIXME: this won't proceed with the next item, needs revamping of getIncomplete()
  472. // to make this possible
  473. $lastPath = $path;
  474. }
  475. }
  476. }
  477. private function runBackgroundScanJob(callable $callback, $path) {
  478. try {
  479. $callback();
  480. \OC_Hook::emit('Scanner', 'correctFolderSize', ['path' => $path]);
  481. if ($this->cacheActive && $this->cache instanceof Cache) {
  482. $this->cache->correctFolderSize($path, null, true);
  483. }
  484. } catch (\OCP\Files\StorageInvalidException $e) {
  485. // skip unavailable storages
  486. } catch (\OCP\Files\StorageNotAvailableException $e) {
  487. // skip unavailable storages
  488. } catch (\OCP\Files\ForbiddenException $e) {
  489. // skip forbidden storages
  490. } catch (\OCP\Lock\LockedException $e) {
  491. // skip unavailable storages
  492. }
  493. }
  494. /**
  495. * Set whether the cache is affected by scan operations
  496. *
  497. * @param boolean $active The active state of the cache
  498. */
  499. public function setCacheActive($active) {
  500. $this->cacheActive = $active;
  501. }
  502. }