From 86dd8e6b53f110968f592523afd1efdb9bc5b5f0 Mon Sep 17 00:00:00 2001 From: Stanislaw Adaszewski Date: Mon, 10 Feb 2020 12:51:29 +0100 Subject: [PATCH] Manifest parsing in a web worker seems to work well. --- frontend/package.json | 4 +- frontend/rollup.config.js | 1 + .../src/js/component/wb-collection-content.js | 190 ++++++++++++---- frontend/src/js/misc/wb-manifest-worker.js | 215 +++++++----------- 4 files changed, 234 insertions(+), 176 deletions(-) diff --git a/frontend/package.json b/frontend/package.json index 3707664..afc78ce 100755 --- a/frontend/package.json +++ b/frontend/package.json @@ -17,7 +17,9 @@ "rollup-plugin-license": "^0.7.0", "rollup-plugin-minify": "^1.0.3", "rollup-plugin-node-resolve": "^3.3.0", - "watch": "^1.0.2" + "streamsaver": "^2.0.3", + "watch": "^1.0.2", + "web-streams-polyfill": "^2.0.6" }, "scripts": { "rollup": "rollup -c", diff --git a/frontend/rollup.config.js b/frontend/rollup.config.js index 1b9adb6..84d35a6 100755 --- a/frontend/rollup.config.js +++ b/frontend/rollup.config.js @@ -45,6 +45,7 @@ export default { 'node_modules/filesize/lib/filesize.js': 'dist/js/filesize.js', 'node_modules/crypto-js/core.js': 'dist/js/crypto-js/core.js', 'node_modules/crypto-js/md5.js': 'dist/js/crypto-js/md5.js', + 'src/js/misc/wb-manifest-worker.js': 'dist/js/wb-manifest-worker.js', verbose: true }), buble({jsx: 'h'}), diff --git a/frontend/src/js/component/wb-collection-content.js b/frontend/src/js/component/wb-collection-content.js index 4aa8cdb..3cb7a74 100644 --- a/frontend/src/js/component/wb-collection-content.js +++ b/frontend/src/js/component/wb-collection-content.js @@ -1,19 +1,28 @@ import { h, Component } from 'preact'; import WBTable from 'wb-table'; import WBBreadcrumbs from 'wb-breadcrumbs'; -import { WBManifestReader } from 'wb-collection-manifest'; -//import WBManifestReader from 'wb-manifest-reader'; +// import { WBManifestReader } from 'wb-collection-manifest'; +// import WBManifestReader from 'wb-manifest-reader'; import WBPagination from 'wb-pagination'; import makeArvadosRequest from 'make-arvados-request'; import wbDownloadFile from 'wb-download-file'; +function unescapeName(name) { + return name.replace(/(\\\\|\\[0-9]{3})/g, + (_, $1) => ($1 === '\\\\' ? '\\' : String.fromCharCode(parseInt($1.substr(1), 8)))); +} + class WBCollectionContent extends Component { constructor(...args) { super(...args); this.state.rows = []; - this.state.manifestReader = null; + this.state.manifestWorker = new Worker('/js/wb-manifest-worker.js'); + this.state.manifestWorker.onerror = (e => console.log(e)); this.state.loaded = 0; this.state.total = 0; + this.state.mode = 'manifestDownload'; + this.state.parsedStreams = 0; + this.state.totalStreams = 1; } getUrl(params) { @@ -26,7 +35,8 @@ class WBCollectionContent extends Component { componentDidMount() { let { arvHost, arvToken } = this.props.app.state; - let { uuid } = this.props; + let { uuid, collectionPath } = this.props; + let { manifestWorker } = this.state; let select = [ 'manifest_text' ]; let prom = makeArvadosRequest(arvHost, arvToken, @@ -36,25 +46,90 @@ class WBCollectionContent extends Component { this.setState({ 'loaded': e.loaded, 'total': e.total }); } }); prom = prom.then(xhr => { - this.state.manifestReader = new WBManifestReader(xhr.response.manifest_text); - this.prepareRows(); + const streams = xhr.response.manifest_text.split('\n'); + const paths = streams.filter(s => s).map(s => { + const n = s.indexOf(' '); + return unescapeName(s.substr(0, n)); + }); + + let prom_1 = new Promise(accept => accept()); + + prom_1 = prom_1.then(() => { + const prom_2 = new Promise(accept => { + manifestWorker.onmessage = () => accept(); + manifestWorker.postMessage([ 'precreatePaths', paths ]); + this.setState({ + 'totalStreams': streams.length, + 'parsedStreams': 0, + 'mode': 'manifestParse' + }) + }) + return prom_2; + }); + + for (let i = 0; i < streams.length; i++) { + + prom_1 = prom_1.then(() => { + const prom_2 = new Promise(accept => { + manifestWorker.onmessage = () => accept(); + manifestWorker.postMessage([ 'parseStream', streams[i] ]); + }); + + return prom_2; + }); + + prom_1 = prom_1.then(() => { + const prom_2 = new Promise(accept => { + manifestWorker.onmessage = (e) => accept(e); + manifestWorker.postMessage([ 'listDirectory', '.' + this.props.collectionPath, true ]); + if (i % 1000 === 0) + console.log(i + '/' + streams.length); + }); + return prom_2; + }); + + prom_1 = prom_1.then(e => { + this.prepareRows(e.data[1]); + this.setState({ 'parsedStreams': (i + 1) }); + }); + } + + prom_1 = prom_1.then(() => this.setState({ + 'mode': 'browsingReady' + })); + + return prom_1; }); } componentWillReceiveProps(nextProps) { + const { manifestWorker, mode } = this.state; + const { collectionPath } = nextProps; + if (mode === 'browsingReady') { + this.state.mode = 'waitForListing'; + let prom = new Promise(accept => { + manifestWorker.onmessage = (e) => accept(e); + manifestWorker.postMessage([ 'listDirectory', '.' + collectionPath ]); + }); + + prom = prom.then(e => { + this.state.mode = 'browsingReady'; + this.prepareRows(e.data[1]); + }); + } this.props = nextProps; - this.prepareRows(); + // this.prepareRows(); } - prepareRows() { - let { manifestReader } = this.state; + prepareRows(listing) { + let { manifestReader, mode } = this.state; let { collectionPath, page, itemsPerPage } = this.props; let { arvHost, arvToken } = this.props.app.state; //path = path.split('/'); //path = [ '.' ].concat(path); - let listing = manifestReader.listDirectory('.' + collectionPath) + //let listing = manifestReader.listDirectory('.' + collectionPath) const numPages = Math.ceil(listing.length / itemsPerPage); listing = listing.slice(page * itemsPerPage, page * itemsPerPage + itemsPerPage); @@ -71,52 +146,73 @@ class WBCollectionContent extends Component { item[1], 'File', filesize(item[2]), - (
- - - -
) + ( (mode === 'browsingReady') ? ( +
+ + + +
+ ) : null) ] )) }); } - render({ collectionPath, page }, { manifestReader, rows, numPages, loaded, total }) { + render({ collectionPath, page }, { manifestReader, rows, + numPages, loaded, total, mode, parsedStreams, totalStreams }) { + return (
- { manifestReader ? ( -
- - - this.getUrl({ 'page': page }) } /> -
- ) : ( -
Downloading manifest: { filesize(loaded) }
- ) } - + { (mode === 'manifestDownload') ? + ( +
+
Downloading manifest: { filesize(loaded) }
+
+
+
+
+ + ) : ( +
+ { mode === 'manifestParse' ? ( +
+
Parsing manifest: { parsedStreams }/{ totalStreams }
+
+
+
+
+ ) : null } + + + + this.getUrl({ 'page': page }) } /> +
+ ) }
); } diff --git a/frontend/src/js/misc/wb-manifest-worker.js b/frontend/src/js/misc/wb-manifest-worker.js index cc7fbd7..e54e8f2 100644 --- a/frontend/src/js/misc/wb-manifest-worker.js +++ b/frontend/src/js/misc/wb-manifest-worker.js @@ -1,3 +1,73 @@ +const rx = /^[a-f0-9]{32}\+[0-9]+/; +const rootDir = {}; +const streams = []; + +onmessage = function(e) { + switch (e.data[0]) { + case 'precreatePaths': + precreatePaths(e.data[1]); + postMessage([ 'precreatePathsResult' ]); + break; + case 'parseStream': + parseStream(e.data[1]); + postMessage([ 'parseStreamResult' ]); + break; + case 'listDirectory': { + const lst = listDirectory(rootDir, e.data[1], e.data[2]); + postMessage([ 'listDirectoryResult', lst ]) + break; } + default: + throw Error('Unknown verb: ' + e.data[0]); + } +} + +function precreatePaths(paths) { + for (let i = 0; i < paths.length; i++) { + mkpath(rootDir, paths[i]); + } +} + +function parseStream(s) { + if (!s) return; + + const tokens = s.split(' '); + const streamName = unescapeName(tokens[0]); + + let n = tokens.map(t => rx.exec(t)); + n = n.indexOf(null, 1); + + let locators = tokens.slice(1, n); + let pos = 0; + locators = locators.map(loc => { + const sz = parseInt(loc.split('+')[1], 10); + return [ loc, pos, pos += sz ]; + }); + + let fileTokens = tokens.slice(n); + let lastFile = null; + let lastPath = null; + fileTokens.map(t => { + let seg = t.split(':'); + seg = [ parseInt(seg[0], 10), parseInt(seg[1], 10), + unescapeName(seg.slice(2).join(':')) ] + const path = streamName + '/' + seg[2]; + let f; + if (path === lastPath) { + f = lastFile; + } else { + let dirName = path.split('/'); + const name = dirName[dirName.length - 1]; + dirName = dirName.slice(0, dirName.length - 1); + const d = mkpath(rootDir, dirName); + lastFile = f = makeFile(d, name); + lastPath = path; + } + appendFile(f, streams.length, seg); + }); + + streams.push(locators); +} + function mkdir(parent, name) { if (name in parent && (parent[name] instanceof Array)) throw Error('File with the same name already exists'); @@ -31,7 +101,7 @@ function makeFile(dir, name) { function appendFile(f, sidx, seg) { f[0].push([ sidx, seg[0], seg[1] ]); - //f[1] += seg[1]; + f[1] += seg[1]; return f; } @@ -40,142 +110,31 @@ function unescapeName(name) { (_, $1) => ($1 === '\\\\' ? '\\' : String.fromCharCode(parseInt($1.substr(1), 8)))); } -function process(streams) { - const rootDir = {}; - - streams.map((s, sidx) => { - const [ streamName, locators, segments ] = s; - const streamDir = mkpath(rootDir, streamName); - segments.map((seg, segidx) => { - let name = seg[2].split('/'); - const dir = (name.length === 1 ? streamDir : - mkpath(streamDir, ['.'].concat(name.slice(0, name.length - 1)))); - name = name[name.length - 1]; - appendFile(dir, name, sidx, seg); - }); - }); - - return rootDir; -} - -function parse(manifestText) { - const M_STREAM_NAME = 0; - const M_LOCATORS = 1; - const M_FILE_SEGMENTS = 2; - - let mode = M_STREAM_NAME; - - const streams = []; - let locators = []; - - let streamName; - let accum = ''; - let tokenStart = 0; - - let lastFile = null; - let lastPath = null; - const rootDir = {}; - - for (let i = 0; i < manifestText.length; i++) { - const c = manifestText[i]; - - if (mode === M_STREAM_NAME) { - if (c === ' ') { - mode = M_LOCATORS; - streamName = unescapeName(accum); - accum = ''; - tokenStart = i + 1; - } else { - accum += c; - } - - } else if (mode === M_LOCATORS) { - if (c === ':') { - mode = M_FILE_SEGMENTS; - accum = ''; - i = tokenStart - 1; - let pos = 0; - locators = locators.map(loc => { - const r = loc.concat([ pos, pos + loc[1] ]); - pos += loc[1]; - return r; - }); - } else if (c === ' ') { - const sz = Number(accum.split('+')[1]); - locators.push([accum, sz]); - accum = ''; - tokenStart = i + 1; - } else { - accum += c; - } - - } else if (mode === M_FILE_SEGMENTS) { - if (c === ' ' || c === '\n') { - let seg = accum.split(':'); - seg = [Number(seg[0]), Number(seg[1]), seg.slice(2).join(':')]; - const path = streamName + '/' + unescapeName(seg[2]); - let f; - if (path !== lastPath) { - let dirName = path.split('/'); - const fileName = dirName[dirName.length - 1]; - dirName = dirName.slice(0, dirName.length - 1); - const dir = mkpath(rootDir, dirName); - f = makeFile(dir, fileName); - lastPath = path; - lastFile = f; - } else { - f = lastFile; - } - appendFile(f, streams.length, seg); - accum = ''; - tokenStart = i + 1; - if (c === '\n') { - streams.push([ streamName, locators ]); - locators = []; - mode = M_STREAM_NAME; - } - } else { - accum += c; - } - - } - } - - return { rootDir, streams }; -} - -function findDir(parent, path) { +function findDir(parent, path, lenient=false) { if (typeof(path) === 'string') path = path.split('/'); if (path[0] !== '.') throw Error('Path must start with a dot (.)'); let dir = parent; for (let i = 1; i < path.length; i++) { - if (!(path[i] in dir)) - throw Error('Directory not found'); + if (!(path[i] in dir)) { + if (lenient) + return {}; + else + throw Error('Directory not found'); + } dir = dir[path[i]]; } return dir; } -class WBManifestReader { - constructor(manifestText) { - const {rootDir, streams} = parse(manifestText); - this.rootDir = rootDir; - this.streams = streams; - //this.rootDir = process(this.streams); - } - - listDirectory(path) { - let dir = findDir(this.rootDir, path); - let keys = Object.keys(dir); - keys.sort(); - let subdirs = keys.filter(k => !(dir[k] instanceof Array)); - let files = keys.filter(k => (dir[k] instanceof Array)); - let res = subdirs.map(k => [ 'd', k, null ]); - res = res.concat(files.map(k => [ 'f', k, dir[k][1] ])); - return res; - } +function listDirectory(rootDir, path, lenient=false) { + let dir = findDir(rootDir, path, lenient); + let keys = Object.keys(dir); + keys.sort(); + let subdirs = keys.filter(k => !(dir[k] instanceof Array)); + let files = keys.filter(k => (dir[k] instanceof Array)); + let res = subdirs.map(k => [ 'd', k, null ]); + res = res.concat(files.map(k => [ 'f', k, dir[k][1] ])); + return res; } - -export default WBManifestReader;