From 3731fa54f0c932501271b587cd0da70bd8aa0999 Mon Sep 17 00:00:00 2001 From: Stanislaw Adaszewski Date: Sun, 9 Feb 2020 19:44:38 +0100 Subject: [PATCH] Starting to work on manifest worker. --- .../src/js/component/wb-collection-content.js | 2 +- .../src/js/misc/wb-collection-manifest.js | 2 +- frontend/src/js/misc/wb-manifest-reader.js | 86 +++++++-- frontend/src/js/misc/wb-manifest-worker.js | 181 ++++++++++++++++++ 4 files changed, 253 insertions(+), 18 deletions(-) create mode 100644 frontend/src/js/misc/wb-manifest-worker.js diff --git a/frontend/src/js/component/wb-collection-content.js b/frontend/src/js/component/wb-collection-content.js index 715c413..4aa8cdb 100644 --- a/frontend/src/js/component/wb-collection-content.js +++ b/frontend/src/js/component/wb-collection-content.js @@ -2,7 +2,7 @@ import { h, Component } from 'preact'; import WBTable from 'wb-table'; import WBBreadcrumbs from 'wb-breadcrumbs'; import { WBManifestReader } from 'wb-collection-manifest'; -// import WBManifestReader from 'wb-manifest-reader'; +//import WBManifestReader from 'wb-manifest-reader'; import WBPagination from 'wb-pagination'; import makeArvadosRequest from 'make-arvados-request'; import wbDownloadFile from 'wb-download-file'; diff --git a/frontend/src/js/misc/wb-collection-manifest.js b/frontend/src/js/misc/wb-collection-manifest.js index 5454d54..6734995 100644 --- a/frontend/src/js/misc/wb-collection-manifest.js +++ b/frontend/src/js/misc/wb-collection-manifest.js @@ -43,7 +43,7 @@ class WBManifestReader { dir[fileName] = [[], 0]; if (!(dir[fileName] instanceof Array)) throw Error('Conflict trying to create a file - a directory with the same name already exists: ' + fileName); - this.appendReferences(dir[fileName], locators, position, size); + //this.appendReferences(dir[fileName], locators, position, size); } appendReferences(file, locators, position, size) { diff --git a/frontend/src/js/misc/wb-manifest-reader.js b/frontend/src/js/misc/wb-manifest-reader.js index 7b41720..cc7fbd7 100644 --- a/frontend/src/js/misc/wb-manifest-reader.js +++ b/frontend/src/js/misc/wb-manifest-reader.js @@ -18,16 +18,28 @@ function mkpath(parent, path) { return dir; } -function appendFile(dir, name, sidx, seg) { - if (name in dir && (!(dir[name] instanceof Array))) - throw Error('Directory with the same name already exists'); - if (!(name in dir)) - dir[name] = []; - const f = dir[name]; - f.push([ sidx, seg[0], seg[1] ]); +function makeFile(dir, name) { + if (name in dir) { + if (!(dir[name] instanceof Array)) + throw Error('Directory with the same name already exists'); + return dir[name]; + } + const f = [[], 0]; + dir[name] = f; return f; } +function appendFile(f, sidx, seg) { + f[0].push([ sidx, seg[0], seg[1] ]); + //f[1] += seg[1]; + return f; +} + +function unescapeName(name) { + return name.replace(/(\\\\|\\[0-9]{3})/g, + (_, $1) => ($1 === '\\\\' ? '\\' : String.fromCharCode(parseInt($1.substr(1), 8)))); +} + function process(streams) { const rootDir = {}; @@ -55,19 +67,22 @@ function parse(manifestText) { const streams = []; let locators = []; - let segments = []; let streamName; let accum = ''; let tokenStart = 0; + let lastFile = null; + let lastPath = null; + const rootDir = {}; + for (let i = 0; i < manifestText.length; i++) { const c = manifestText[i]; if (mode === M_STREAM_NAME) { if (c === ' ') { mode = M_LOCATORS; - streamName = accum; + streamName = unescapeName(accum); accum = ''; tokenStart = i + 1; } else { @@ -97,14 +112,26 @@ function parse(manifestText) { } else if (mode === M_FILE_SEGMENTS) { if (c === ' ' || c === '\n') { let seg = accum.split(':'); - seg = [Number(seg[0]), Number(seg[1]), seg[2]]; - segments.push(seg); + seg = [Number(seg[0]), Number(seg[1]), seg.slice(2).join(':')]; + const path = streamName + '/' + unescapeName(seg[2]); + let f; + if (path !== lastPath) { + let dirName = path.split('/'); + const fileName = dirName[dirName.length - 1]; + dirName = dirName.slice(0, dirName.length - 1); + const dir = mkpath(rootDir, dirName); + f = makeFile(dir, fileName); + lastPath = path; + lastFile = f; + } else { + f = lastFile; + } + appendFile(f, streams.length, seg); accum = ''; tokenStart = i + 1; if (c === '\n') { - streams.push([streamName, locators, segments]); + streams.push([ streamName, locators ]); locators = []; - segments = []; mode = M_STREAM_NAME; } } else { @@ -114,13 +141,40 @@ function parse(manifestText) { } } - return streams; + return { rootDir, streams }; +} + +function findDir(parent, path) { + if (typeof(path) === 'string') + path = path.split('/'); + if (path[0] !== '.') + throw Error('Path must start with a dot (.)'); + let dir = parent; + for (let i = 1; i < path.length; i++) { + if (!(path[i] in dir)) + throw Error('Directory not found'); + dir = dir[path[i]]; + } + return dir; } class WBManifestReader { constructor(manifestText) { - this.streams = parse(manifestText); - this.rootDir = process(this.streams); + const {rootDir, streams} = parse(manifestText); + this.rootDir = rootDir; + this.streams = streams; + //this.rootDir = process(this.streams); + } + + listDirectory(path) { + let dir = findDir(this.rootDir, path); + let keys = Object.keys(dir); + keys.sort(); + let subdirs = keys.filter(k => !(dir[k] instanceof Array)); + let files = keys.filter(k => (dir[k] instanceof Array)); + let res = subdirs.map(k => [ 'd', k, null ]); + res = res.concat(files.map(k => [ 'f', k, dir[k][1] ])); + return res; } } diff --git a/frontend/src/js/misc/wb-manifest-worker.js b/frontend/src/js/misc/wb-manifest-worker.js new file mode 100644 index 0000000..cc7fbd7 --- /dev/null +++ b/frontend/src/js/misc/wb-manifest-worker.js @@ -0,0 +1,181 @@ +function mkdir(parent, name) { + if (name in parent && (parent[name] instanceof Array)) + throw Error('File with the same name already exists'); + if (name in parent) + return parent[name]; + const dir = {}; + parent[name] = dir; + return dir; +} + +function mkpath(parent, path) { + if (typeof(path) === 'string') + path = path.split('/'); + let dir = parent; + for (let i = 1; i < path.length; i++) { + dir = mkdir(dir, path[i]); + } + return dir; +} + +function makeFile(dir, name) { + if (name in dir) { + if (!(dir[name] instanceof Array)) + throw Error('Directory with the same name already exists'); + return dir[name]; + } + const f = [[], 0]; + dir[name] = f; + return f; +} + +function appendFile(f, sidx, seg) { + f[0].push([ sidx, seg[0], seg[1] ]); + //f[1] += seg[1]; + return f; +} + +function unescapeName(name) { + return name.replace(/(\\\\|\\[0-9]{3})/g, + (_, $1) => ($1 === '\\\\' ? '\\' : String.fromCharCode(parseInt($1.substr(1), 8)))); +} + +function process(streams) { + const rootDir = {}; + + streams.map((s, sidx) => { + const [ streamName, locators, segments ] = s; + const streamDir = mkpath(rootDir, streamName); + segments.map((seg, segidx) => { + let name = seg[2].split('/'); + const dir = (name.length === 1 ? streamDir : + mkpath(streamDir, ['.'].concat(name.slice(0, name.length - 1)))); + name = name[name.length - 1]; + appendFile(dir, name, sidx, seg); + }); + }); + + return rootDir; +} + +function parse(manifestText) { + const M_STREAM_NAME = 0; + const M_LOCATORS = 1; + const M_FILE_SEGMENTS = 2; + + let mode = M_STREAM_NAME; + + const streams = []; + let locators = []; + + let streamName; + let accum = ''; + let tokenStart = 0; + + let lastFile = null; + let lastPath = null; + const rootDir = {}; + + for (let i = 0; i < manifestText.length; i++) { + const c = manifestText[i]; + + if (mode === M_STREAM_NAME) { + if (c === ' ') { + mode = M_LOCATORS; + streamName = unescapeName(accum); + accum = ''; + tokenStart = i + 1; + } else { + accum += c; + } + + } else if (mode === M_LOCATORS) { + if (c === ':') { + mode = M_FILE_SEGMENTS; + accum = ''; + i = tokenStart - 1; + let pos = 0; + locators = locators.map(loc => { + const r = loc.concat([ pos, pos + loc[1] ]); + pos += loc[1]; + return r; + }); + } else if (c === ' ') { + const sz = Number(accum.split('+')[1]); + locators.push([accum, sz]); + accum = ''; + tokenStart = i + 1; + } else { + accum += c; + } + + } else if (mode === M_FILE_SEGMENTS) { + if (c === ' ' || c === '\n') { + let seg = accum.split(':'); + seg = [Number(seg[0]), Number(seg[1]), seg.slice(2).join(':')]; + const path = streamName + '/' + unescapeName(seg[2]); + let f; + if (path !== lastPath) { + let dirName = path.split('/'); + const fileName = dirName[dirName.length - 1]; + dirName = dirName.slice(0, dirName.length - 1); + const dir = mkpath(rootDir, dirName); + f = makeFile(dir, fileName); + lastPath = path; + lastFile = f; + } else { + f = lastFile; + } + appendFile(f, streams.length, seg); + accum = ''; + tokenStart = i + 1; + if (c === '\n') { + streams.push([ streamName, locators ]); + locators = []; + mode = M_STREAM_NAME; + } + } else { + accum += c; + } + + } + } + + return { rootDir, streams }; +} + +function findDir(parent, path) { + if (typeof(path) === 'string') + path = path.split('/'); + if (path[0] !== '.') + throw Error('Path must start with a dot (.)'); + let dir = parent; + for (let i = 1; i < path.length; i++) { + if (!(path[i] in dir)) + throw Error('Directory not found'); + dir = dir[path[i]]; + } + return dir; +} + +class WBManifestReader { + constructor(manifestText) { + const {rootDir, streams} = parse(manifestText); + this.rootDir = rootDir; + this.streams = streams; + //this.rootDir = process(this.streams); + } + + listDirectory(path) { + let dir = findDir(this.rootDir, path); + let keys = Object.keys(dir); + keys.sort(); + let subdirs = keys.filter(k => !(dir[k] instanceof Array)); + let files = keys.filter(k => (dir[k] instanceof Array)); + let res = subdirs.map(k => [ 'd', k, null ]); + res = res.concat(files.map(k => [ 'f', k, dir[k][1] ])); + return res; + } +} + +export default WBManifestReader;