IF YOU WOULD LIKE TO GET AN ACCOUNT, please write an email to s dot adaszewski at gmail dot com. User accounts are meant only to report issues and/or generate pull requests. This is a purpose-specific Git hosting for ADARED projects. Thank you for your understanding!
Browse Source

Manifest parsing in a web worker seems to work well.

pull/1/head
parent
commit
86dd8e6b53
4 changed files with 234 additions and 176 deletions
  1. +3
    -1
      frontend/package.json
  2. +1
    -0
      frontend/rollup.config.js
  3. +143
    -47
      frontend/src/js/component/wb-collection-content.js
  4. +87
    -128
      frontend/src/js/misc/wb-manifest-worker.js

+ 3
- 1
frontend/package.json View File

@@ -17,7 +17,9 @@
"rollup-plugin-license": "^0.7.0",
"rollup-plugin-minify": "^1.0.3",
"rollup-plugin-node-resolve": "^3.3.0",
"watch": "^1.0.2"
"streamsaver": "^2.0.3",
"watch": "^1.0.2",
"web-streams-polyfill": "^2.0.6"
},
"scripts": {
"rollup": "rollup -c",


+ 1
- 0
frontend/rollup.config.js View File

@@ -45,6 +45,7 @@ export default {
'node_modules/filesize/lib/filesize.js': 'dist/js/filesize.js',
'node_modules/crypto-js/core.js': 'dist/js/crypto-js/core.js',
'node_modules/crypto-js/md5.js': 'dist/js/crypto-js/md5.js',
'src/js/misc/wb-manifest-worker.js': 'dist/js/wb-manifest-worker.js',
verbose: true
}),
buble({jsx: 'h'}),


+ 143
- 47
frontend/src/js/component/wb-collection-content.js View File

@@ -1,19 +1,28 @@
import { h, Component } from 'preact';
import WBTable from 'wb-table';
import WBBreadcrumbs from 'wb-breadcrumbs';
import { WBManifestReader } from 'wb-collection-manifest';
//import WBManifestReader from 'wb-manifest-reader';
// import { WBManifestReader } from 'wb-collection-manifest';
// import WBManifestReader from 'wb-manifest-reader';
import WBPagination from 'wb-pagination';
import makeArvadosRequest from 'make-arvados-request';
import wbDownloadFile from 'wb-download-file';
function unescapeName(name) {
return name.replace(/(\\\\|\\[0-9]{3})/g,
(_, $1) => ($1 === '\\\\' ? '\\' : String.fromCharCode(parseInt($1.substr(1), 8))));
}
class WBCollectionContent extends Component {
constructor(...args) {
super(...args);
this.state.rows = [];
this.state.manifestReader = null;
this.state.manifestWorker = new Worker('/js/wb-manifest-worker.js');
this.state.manifestWorker.onerror = (e => console.log(e));
this.state.loaded = 0;
this.state.total = 0;
this.state.mode = 'manifestDownload';
this.state.parsedStreams = 0;
this.state.totalStreams = 1;
}
getUrl(params) {
@@ -26,7 +35,8 @@ class WBCollectionContent extends Component {
componentDidMount() {
let { arvHost, arvToken } = this.props.app.state;
let { uuid } = this.props;
let { uuid, collectionPath } = this.props;
let { manifestWorker } = this.state;
let select = [ 'manifest_text' ];
let prom = makeArvadosRequest(arvHost, arvToken,
@@ -36,25 +46,90 @@ class WBCollectionContent extends Component {
this.setState({ 'loaded': e.loaded, 'total': e.total });
} });
prom = prom.then(xhr => {
this.state.manifestReader = new WBManifestReader(xhr.response.manifest_text);
this.prepareRows();
const streams = xhr.response.manifest_text.split('\n');
const paths = streams.filter(s => s).map(s => {
const n = s.indexOf(' ');
return unescapeName(s.substr(0, n));
});
let prom_1 = new Promise(accept => accept());
prom_1 = prom_1.then(() => {
const prom_2 = new Promise(accept => {
manifestWorker.onmessage = () => accept();
manifestWorker.postMessage([ 'precreatePaths', paths ]);
this.setState({
'totalStreams': streams.length,
'parsedStreams': 0,
'mode': 'manifestParse'
})
})
return prom_2;
});
for (let i = 0; i < streams.length; i++) {
prom_1 = prom_1.then(() => {
const prom_2 = new Promise(accept => {
manifestWorker.onmessage = () => accept();
manifestWorker.postMessage([ 'parseStream', streams[i] ]);
});
return prom_2;
});
prom_1 = prom_1.then(() => {
const prom_2 = new Promise(accept => {
manifestWorker.onmessage = (e) => accept(e);
manifestWorker.postMessage([ 'listDirectory', '.' + this.props.collectionPath, true ]);
if (i % 1000 === 0)
console.log(i + '/' + streams.length);
});
return prom_2;
});
prom_1 = prom_1.then(e => {
this.prepareRows(e.data[1]);
this.setState({ 'parsedStreams': (i + 1) });
});
}
prom_1 = prom_1.then(() => this.setState({
'mode': 'browsingReady'
}));
return prom_1;
});
}
componentWillReceiveProps(nextProps) {
const { manifestWorker, mode } = this.state;
const { collectionPath } = nextProps;
if (mode === 'browsingReady') {
this.state.mode = 'waitForListing';
let prom = new Promise(accept => {
manifestWorker.onmessage = (e) => accept(e);
manifestWorker.postMessage([ 'listDirectory', '.' + collectionPath ]);
});
prom = prom.then(e => {
this.state.mode = 'browsingReady';
this.prepareRows(e.data[1]);
});
}
this.props = nextProps;
this.prepareRows();
// this.prepareRows();
}
prepareRows() {
let { manifestReader } = this.state;
prepareRows(listing) {
let { manifestReader, mode } = this.state;
let { collectionPath, page, itemsPerPage } = this.props;
let { arvHost, arvToken } = this.props.app.state;
//path = path.split('/');
//path = [ '.' ].concat(path);
let listing = manifestReader.listDirectory('.' + collectionPath)
//let listing = manifestReader.listDirectory('.' + collectionPath)
const numPages = Math.ceil(listing.length / itemsPerPage);
listing = listing.slice(page * itemsPerPage,
page * itemsPerPage + itemsPerPage);
@@ -71,52 +146,73 @@ class WBCollectionContent extends Component {
item[1],
'File',
filesize(item[2]),
(<div>
<button class="btn btn-outline-primary mx-1" title="Download"
onclick={ () => {
let prom = wbDownloadFile(arvHost, arvToken, manifestReader,
'.' + collectionPath + '/' + item[1]);
prom = prom.then(blocks => {
const blob = new Blob(blocks);
const a = document.createElement('a');
a.name = item[1];
a.href = window.URL.createObjectURL(blob);
a.click();
});
} }><i class="fas fa-download"></i></button>
<button class="btn btn-outline-primary mx-1" title="View"
onclick={ () => {
let prom = wbDownloadFile(arvHost, arvToken, manifestReader,
'.' + collectionPath + '/' + item[1]);
prom = prom.then(blocks => {
const blob = new Blob(blocks);
window.open(window.URL.createObjectURL(blob));
});
} }><i class="far fa-eye"></i></button>
</div>)
( (mode === 'browsingReady') ? (
<div>
<button class="btn btn-outline-primary mx-1" title="Download"
onclick={ () => {
let prom = wbDownloadFile(arvHost, arvToken, manifestReader,
'.' + collectionPath + '/' + item[1]);
prom = prom.then(blocks => {
const blob = new Blob(blocks);
const a = document.createElement('a');
a.name = item[1];
a.href = window.URL.createObjectURL(blob);
a.click();
});
} }><i class="fas fa-download"></i></button>
<button class="btn btn-outline-primary mx-1" title="View"
onclick={ () => {
let prom = wbDownloadFile(arvHost, arvToken, manifestReader,
'.' + collectionPath + '/' + item[1]);
prom = prom.then(blocks => {
const blob = new Blob(blocks);
window.open(window.URL.createObjectURL(blob));
});
} }><i class="far fa-eye"></i></button>
</div>
) : null)
]
))
});
}
render({ collectionPath, page }, { manifestReader, rows, numPages, loaded, total }) {
render({ collectionPath, page }, { manifestReader, rows,
numPages, loaded, total, mode, parsedStreams, totalStreams }) {
return (
<div>
<WBBreadcrumbs items={ ('.' + collectionPath).split('/') } />
{ manifestReader ? (
<div>
<WBTable columns={ [ 'Name', 'Type', 'Size', 'Actions' ] }
rows={ rows } />
<WBPagination activePage={ page } numPages={ numPages }
getPageUrl={ page => this.getUrl({ 'page': page }) } />
</div>
) : (
<div>Downloading manifest: { filesize(loaded) }</div>
) }
{ (mode === 'manifestDownload') ?
(
<div class="container-fluid">
<div>Downloading manifest: { filesize(loaded) }</div>
<div class="progress">
<div class="progress-bar progress-bar-striped progress-bar-animated" role="progressbar"
aria-valuenow="100" aria-valuemin="0" aria-valuemax="100" style="width: 100%"></div>
</div>
</div>
) : (
<div>
{ mode === 'manifestParse' ? (
<div class="container-fluid mb-2">
<div>Parsing manifest: { parsedStreams }/{ totalStreams }</div>
<div class="progress">
<div class="progress-bar progress-bar-striped progress-bar-animated bg-success" role="progressbar"
aria-valuenow={ totalStreams } aria-valuemin="0" aria-valuemax={ parsedStreams } style={ 'width: ' + Math.round(parsedStreams * 100 / totalStreams) + '%' }></div>
</div>
</div>
) : null }
<WBTable columns={ [ 'Name', 'Type', 'Size', 'Actions' ] }
rows={ rows } />
<WBPagination activePage={ page } numPages={ numPages }
getPageUrl={ page => this.getUrl({ 'page': page }) } />
</div>
) }
</div>
);
}


+ 87
- 128
frontend/src/js/misc/wb-manifest-worker.js View File

@@ -1,3 +1,73 @@
const rx = /^[a-f0-9]{32}\+[0-9]+/;
const rootDir = {};
const streams = [];
onmessage = function(e) {
switch (e.data[0]) {
case 'precreatePaths':
precreatePaths(e.data[1]);
postMessage([ 'precreatePathsResult' ]);
break;
case 'parseStream':
parseStream(e.data[1]);
postMessage([ 'parseStreamResult' ]);
break;
case 'listDirectory': {
const lst = listDirectory(rootDir, e.data[1], e.data[2]);
postMessage([ 'listDirectoryResult', lst ])
break; }
default:
throw Error('Unknown verb: ' + e.data[0]);
}
}
function precreatePaths(paths) {
for (let i = 0; i < paths.length; i++) {
mkpath(rootDir, paths[i]);
}
}
function parseStream(s) {
if (!s) return;
const tokens = s.split(' ');
const streamName = unescapeName(tokens[0]);
let n = tokens.map(t => rx.exec(t));
n = n.indexOf(null, 1);
let locators = tokens.slice(1, n);
let pos = 0;
locators = locators.map(loc => {
const sz = parseInt(loc.split('+')[1], 10);
return [ loc, pos, pos += sz ];
});
let fileTokens = tokens.slice(n);
let lastFile = null;
let lastPath = null;
fileTokens.map(t => {
let seg = t.split(':');
seg = [ parseInt(seg[0], 10), parseInt(seg[1], 10),
unescapeName(seg.slice(2).join(':')) ]
const path = streamName + '/' + seg[2];
let f;
if (path === lastPath) {
f = lastFile;
} else {
let dirName = path.split('/');
const name = dirName[dirName.length - 1];
dirName = dirName.slice(0, dirName.length - 1);
const d = mkpath(rootDir, dirName);
lastFile = f = makeFile(d, name);
lastPath = path;
}
appendFile(f, streams.length, seg);
});
streams.push(locators);
}
function mkdir(parent, name) {
if (name in parent && (parent[name] instanceof Array))
throw Error('File with the same name already exists');
@@ -31,7 +101,7 @@ function makeFile(dir, name) {
function appendFile(f, sidx, seg) {
f[0].push([ sidx, seg[0], seg[1] ]);
//f[1] += seg[1];
f[1] += seg[1];
return f;
}
@@ -40,142 +110,31 @@ function unescapeName(name) {
(_, $1) => ($1 === '\\\\' ? '\\' : String.fromCharCode(parseInt($1.substr(1), 8))));
}
function process(streams) {
const rootDir = {};
streams.map((s, sidx) => {
const [ streamName, locators, segments ] = s;
const streamDir = mkpath(rootDir, streamName);
segments.map((seg, segidx) => {
let name = seg[2].split('/');
const dir = (name.length === 1 ? streamDir :
mkpath(streamDir, ['.'].concat(name.slice(0, name.length - 1))));
name = name[name.length - 1];
appendFile(dir, name, sidx, seg);
});
});
return rootDir;
}
function parse(manifestText) {
const M_STREAM_NAME = 0;
const M_LOCATORS = 1;
const M_FILE_SEGMENTS = 2;
let mode = M_STREAM_NAME;
const streams = [];
let locators = [];
let streamName;
let accum = '';
let tokenStart = 0;
let lastFile = null;
let lastPath = null;
const rootDir = {};
for (let i = 0; i < manifestText.length; i++) {
const c = manifestText[i];
if (mode === M_STREAM_NAME) {
if (c === ' ') {
mode = M_LOCATORS;
streamName = unescapeName(accum);
accum = '';
tokenStart = i + 1;
} else {
accum += c;
}
} else if (mode === M_LOCATORS) {
if (c === ':') {
mode = M_FILE_SEGMENTS;
accum = '';
i = tokenStart - 1;
let pos = 0;
locators = locators.map(loc => {
const r = loc.concat([ pos, pos + loc[1] ]);
pos += loc[1];
return r;
});
} else if (c === ' ') {
const sz = Number(accum.split('+')[1]);
locators.push([accum, sz]);
accum = '';
tokenStart = i + 1;
} else {
accum += c;
}
} else if (mode === M_FILE_SEGMENTS) {
if (c === ' ' || c === '\n') {
let seg = accum.split(':');
seg = [Number(seg[0]), Number(seg[1]), seg.slice(2).join(':')];
const path = streamName + '/' + unescapeName(seg[2]);
let f;
if (path !== lastPath) {
let dirName = path.split('/');
const fileName = dirName[dirName.length - 1];
dirName = dirName.slice(0, dirName.length - 1);
const dir = mkpath(rootDir, dirName);
f = makeFile(dir, fileName);
lastPath = path;
lastFile = f;
} else {
f = lastFile;
}
appendFile(f, streams.length, seg);
accum = '';
tokenStart = i + 1;
if (c === '\n') {
streams.push([ streamName, locators ]);
locators = [];
mode = M_STREAM_NAME;
}
} else {
accum += c;
}
}
}
return { rootDir, streams };
}
function findDir(parent, path) {
function findDir(parent, path, lenient=false) {
if (typeof(path) === 'string')
path = path.split('/');
if (path[0] !== '.')
throw Error('Path must start with a dot (.)');
let dir = parent;
for (let i = 1; i < path.length; i++) {
if (!(path[i] in dir))
throw Error('Directory not found');
if (!(path[i] in dir)) {
if (lenient)
return {};
else
throw Error('Directory not found');
}
dir = dir[path[i]];
}
return dir;
}
class WBManifestReader {
constructor(manifestText) {
const {rootDir, streams} = parse(manifestText);
this.rootDir = rootDir;
this.streams = streams;
//this.rootDir = process(this.streams);
}
listDirectory(path) {
let dir = findDir(this.rootDir, path);
let keys = Object.keys(dir);
keys.sort();
let subdirs = keys.filter(k => !(dir[k] instanceof Array));
let files = keys.filter(k => (dir[k] instanceof Array));
let res = subdirs.map(k => [ 'd', k, null ]);
res = res.concat(files.map(k => [ 'f', k, dir[k][1] ]));
return res;
}
function listDirectory(rootDir, path, lenient=false) {
let dir = findDir(rootDir, path, lenient);
let keys = Object.keys(dir);
keys.sort();
let subdirs = keys.filter(k => !(dir[k] instanceof Array));
let files = keys.filter(k => (dir[k] instanceof Array));
let res = subdirs.map(k => [ 'd', k, null ]);
res = res.concat(files.map(k => [ 'f', k, dir[k][1] ]));
return res;
}
export default WBManifestReader;

Loading…
Cancel
Save