From e938d9f93ce49be8ee7696dcb2ebdc9ea2cb8f67 Mon Sep 17 00:00:00 2001 From: Stanislaw Adaszewski Date: Sun, 9 Feb 2020 18:18:07 +0100 Subject: [PATCH] Working on a more efficient manifest reader. --- .../src/js/component/wb-collection-content.js | 1 + frontend/src/js/misc/wb-manifest-reader.js | 127 ++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 frontend/src/js/misc/wb-manifest-reader.js diff --git a/frontend/src/js/component/wb-collection-content.js b/frontend/src/js/component/wb-collection-content.js index b71cbb1..715c413 100644 --- a/frontend/src/js/component/wb-collection-content.js +++ b/frontend/src/js/component/wb-collection-content.js @@ -2,6 +2,7 @@ import { h, Component } from 'preact'; import WBTable from 'wb-table'; import WBBreadcrumbs from 'wb-breadcrumbs'; import { WBManifestReader } from 'wb-collection-manifest'; +// import WBManifestReader from 'wb-manifest-reader'; import WBPagination from 'wb-pagination'; import makeArvadosRequest from 'make-arvados-request'; import wbDownloadFile from 'wb-download-file'; diff --git a/frontend/src/js/misc/wb-manifest-reader.js b/frontend/src/js/misc/wb-manifest-reader.js new file mode 100644 index 0000000..7b41720 --- /dev/null +++ b/frontend/src/js/misc/wb-manifest-reader.js @@ -0,0 +1,127 @@ +function mkdir(parent, name) { + if (name in parent && (parent[name] instanceof Array)) + throw Error('File with the same name already exists'); + if (name in parent) + return parent[name]; + const dir = {}; + parent[name] = dir; + return dir; +} + +function mkpath(parent, path) { + if (typeof(path) === 'string') + path = path.split('/'); + let dir = parent; + for (let i = 1; i < path.length; i++) { + dir = mkdir(dir, path[i]); + } + return dir; +} + +function appendFile(dir, name, sidx, seg) { + if (name in dir && (!(dir[name] instanceof Array))) + throw Error('Directory with the same name already exists'); + if (!(name in dir)) + dir[name] = []; + const f = dir[name]; + f.push([ sidx, seg[0], seg[1] ]); + return f; +} + +function process(streams) { + const rootDir = {}; + + streams.map((s, sidx) => { + const [ streamName, locators, segments ] = s; + const streamDir = mkpath(rootDir, streamName); + segments.map((seg, segidx) => { + let name = seg[2].split('/'); + const dir = (name.length === 1 ? streamDir : + mkpath(streamDir, ['.'].concat(name.slice(0, name.length - 1)))); + name = name[name.length - 1]; + appendFile(dir, name, sidx, seg); + }); + }); + + return rootDir; +} + +function parse(manifestText) { + const M_STREAM_NAME = 0; + const M_LOCATORS = 1; + const M_FILE_SEGMENTS = 2; + + let mode = M_STREAM_NAME; + + const streams = []; + let locators = []; + let segments = []; + + let streamName; + let accum = ''; + let tokenStart = 0; + + for (let i = 0; i < manifestText.length; i++) { + const c = manifestText[i]; + + if (mode === M_STREAM_NAME) { + if (c === ' ') { + mode = M_LOCATORS; + streamName = accum; + accum = ''; + tokenStart = i + 1; + } else { + accum += c; + } + + } else if (mode === M_LOCATORS) { + if (c === ':') { + mode = M_FILE_SEGMENTS; + accum = ''; + i = tokenStart - 1; + let pos = 0; + locators = locators.map(loc => { + const r = loc.concat([ pos, pos + loc[1] ]); + pos += loc[1]; + return r; + }); + } else if (c === ' ') { + const sz = Number(accum.split('+')[1]); + locators.push([accum, sz]); + accum = ''; + tokenStart = i + 1; + } else { + accum += c; + } + + } else if (mode === M_FILE_SEGMENTS) { + if (c === ' ' || c === '\n') { + let seg = accum.split(':'); + seg = [Number(seg[0]), Number(seg[1]), seg[2]]; + segments.push(seg); + accum = ''; + tokenStart = i + 1; + if (c === '\n') { + streams.push([streamName, locators, segments]); + locators = []; + segments = []; + mode = M_STREAM_NAME; + } + } else { + accum += c; + } + + } + } + + return streams; +} + +class WBManifestReader { + constructor(manifestText) { + this.streams = parse(manifestText); + this.rootDir = process(this.streams); + } +} + +export default WBManifestReader;