230 lines
6.8 KiB
TypeScript
230 lines
6.8 KiB
TypeScript
|
import { CSVParseParam } from "./Parameters";
|
||
|
import { Converter } from "./Converter";
|
||
|
import { Fileline } from "./fileline";
|
||
|
import getEol from "./getEol";
|
||
|
import { filterArray, trimLeft, trimRight } from "./util";
|
||
|
|
||
|
const defaulDelimiters = [",", "|", "\t", ";", ":"];
|
||
|
export class RowSplit {
|
||
|
private quote: string;
|
||
|
private trim: boolean;
|
||
|
private escape: string;
|
||
|
private cachedRegExp: { [key: string]: RegExp } = {};
|
||
|
private delimiterEmitted = false;
|
||
|
private _needEmitDelimiter?: boolean = undefined;
|
||
|
private get needEmitDelimiter() {
|
||
|
if (this._needEmitDelimiter === undefined) {
|
||
|
this._needEmitDelimiter = this.conv.listeners("delimiter").length > 0;
|
||
|
}
|
||
|
return this._needEmitDelimiter;
|
||
|
}
|
||
|
constructor(private conv: Converter) {
|
||
|
this.quote = conv.parseParam.quote;
|
||
|
this.trim = conv.parseParam.trim;
|
||
|
this.escape = conv.parseParam.escape;
|
||
|
}
|
||
|
parse(fileline: Fileline): RowSplitResult {
|
||
|
if (fileline.length === 0 || (this.conv.parseParam.ignoreEmpty && fileline.trim().length === 0)) {
|
||
|
return { cells: [], closed: true };
|
||
|
}
|
||
|
const quote = this.quote;
|
||
|
const trim = this.trim;
|
||
|
const escape = this.escape;
|
||
|
if (this.conv.parseRuntime.delimiter instanceof Array || this.conv.parseRuntime.delimiter.toLowerCase() === "auto") {
|
||
|
this.conv.parseRuntime.delimiter = this.getDelimiter(fileline);
|
||
|
|
||
|
}
|
||
|
if (this.needEmitDelimiter && !this.delimiterEmitted) {
|
||
|
this.conv.emit("delimiter", this.conv.parseRuntime.delimiter);
|
||
|
this.delimiterEmitted = true;
|
||
|
}
|
||
|
const delimiter = this.conv.parseRuntime.delimiter;
|
||
|
const rowArr = fileline.split(delimiter);
|
||
|
if (quote === "off") {
|
||
|
if (trim) {
|
||
|
for (let i = 0; i < rowArr.length; i++) {
|
||
|
rowArr[i] = rowArr[i].trim();
|
||
|
}
|
||
|
}
|
||
|
return { cells: rowArr, closed: true };
|
||
|
} else {
|
||
|
return this.toCSVRow(rowArr, trim, quote, delimiter);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
private toCSVRow(rowArr: string[], trim: boolean, quote: string, delimiter: string): RowSplitResult {
|
||
|
const row: string[] = [];
|
||
|
let inquote = false;
|
||
|
let quoteBuff = '';
|
||
|
for (let i = 0, rowLen = rowArr.length; i < rowLen; i++) {
|
||
|
let e = rowArr[i];
|
||
|
if (!inquote && trim) {
|
||
|
e = trimLeft(e);
|
||
|
}
|
||
|
const len = e.length;
|
||
|
if (!inquote) {
|
||
|
if (len === 2 && e === this.quote + this.quote) {
|
||
|
row.push("");
|
||
|
continue;
|
||
|
} else if (this.isQuoteOpen(e)) { //quote open
|
||
|
e = e.substr(1);
|
||
|
if (this.isQuoteClose(e)) { //quote close
|
||
|
e = e.substring(0, e.lastIndexOf(quote));
|
||
|
e = this.escapeQuote(e);
|
||
|
row.push(e);
|
||
|
continue;
|
||
|
} else if (e.indexOf(quote) !== -1) {
|
||
|
let count = 0;
|
||
|
for (const c of e) {
|
||
|
if (c === quote) {
|
||
|
count++;
|
||
|
}
|
||
|
}
|
||
|
if (count % 2 === 1) {
|
||
|
if (trim) {
|
||
|
e = trimRight(e);
|
||
|
}
|
||
|
row.push(quote + e);
|
||
|
continue;
|
||
|
}else{
|
||
|
inquote = true;
|
||
|
quoteBuff += e;
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
inquote = true;
|
||
|
quoteBuff += e;
|
||
|
continue;
|
||
|
}
|
||
|
} else {
|
||
|
if (trim) {
|
||
|
e = trimRight(e);
|
||
|
}
|
||
|
row.push(e);
|
||
|
continue;
|
||
|
}
|
||
|
} else { //previous quote not closed
|
||
|
if (this.isQuoteClose(e)) { //close double quote
|
||
|
inquote = false;
|
||
|
e = e.substr(0, len - 1);
|
||
|
quoteBuff += delimiter + e;
|
||
|
quoteBuff = this.escapeQuote(quoteBuff);
|
||
|
if (trim) {
|
||
|
quoteBuff = trimRight(quoteBuff);
|
||
|
}
|
||
|
row.push(quoteBuff);
|
||
|
quoteBuff = "";
|
||
|
} else {
|
||
|
quoteBuff += delimiter + e;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// if (!inquote && param._needFilterRow) {
|
||
|
// row = filterRow(row, param);
|
||
|
// }
|
||
|
|
||
|
return { cells: row, closed: !inquote };
|
||
|
}
|
||
|
private getDelimiter(fileline: Fileline): string {
|
||
|
let checker;
|
||
|
if (this.conv.parseParam.delimiter === "auto") {
|
||
|
checker = defaulDelimiters;
|
||
|
} else if (this.conv.parseParam.delimiter instanceof Array) {
|
||
|
checker = this.conv.parseParam.delimiter;
|
||
|
} else {
|
||
|
return this.conv.parseParam.delimiter;
|
||
|
}
|
||
|
let count = 0;
|
||
|
let rtn = ",";
|
||
|
checker.forEach(function (delim) {
|
||
|
const delimCount = fileline.split(delim).length;
|
||
|
if (delimCount > count) {
|
||
|
rtn = delim;
|
||
|
count = delimCount;
|
||
|
}
|
||
|
});
|
||
|
return rtn;
|
||
|
}
|
||
|
private isQuoteOpen(str: string): boolean {
|
||
|
const quote = this.quote;
|
||
|
const escape = this.escape;
|
||
|
return str[0] === quote && (
|
||
|
str[1] !== quote ||
|
||
|
str[1] === escape && (str[2] === quote || str.length === 2));
|
||
|
}
|
||
|
private isQuoteClose(str: string): boolean {
|
||
|
const quote = this.quote;
|
||
|
const escape = this.escape;
|
||
|
if (this.conv.parseParam.trim) {
|
||
|
str = trimRight(str);
|
||
|
}
|
||
|
let count = 0;
|
||
|
let idx = str.length - 1;
|
||
|
while (str[idx] === quote || str[idx] === escape) {
|
||
|
idx--;
|
||
|
count++;
|
||
|
}
|
||
|
return count % 2 !== 0;
|
||
|
}
|
||
|
|
||
|
// private twoDoubleQuote(str: string): string {
|
||
|
// var twoQuote = this.quote + this.quote;
|
||
|
// var curIndex = -1;
|
||
|
// while ((curIndex = str.indexOf(twoQuote, curIndex)) > -1) {
|
||
|
// str = str.substring(0, curIndex) + str.substring(++curIndex);
|
||
|
// }
|
||
|
// return str;
|
||
|
// }
|
||
|
|
||
|
|
||
|
private escapeQuote(segment: string): string {
|
||
|
const key = "es|" + this.quote + "|" + this.escape;
|
||
|
if (this.cachedRegExp[key] === undefined) {
|
||
|
this.cachedRegExp[key] = new RegExp('\\' + this.escape + '\\' + this.quote, 'g');
|
||
|
}
|
||
|
const regExp = this.cachedRegExp[key];
|
||
|
// console.log(regExp,segment);
|
||
|
return segment.replace(regExp, this.quote);
|
||
|
}
|
||
|
parseMultiLines(lines: Fileline[]): MultipleRowResult {
|
||
|
const csvLines: string[][] = [];
|
||
|
let left = "";
|
||
|
while (lines.length) {
|
||
|
const line = left + lines.shift();
|
||
|
const row = this.parse(line);
|
||
|
if (row.cells.length === 0 && this.conv.parseParam.ignoreEmpty) {
|
||
|
continue;
|
||
|
}
|
||
|
if (row.closed || this.conv.parseParam.alwaysSplitAtEOL) {
|
||
|
if (this.conv.parseRuntime.selectedColumns) {
|
||
|
csvLines.push(filterArray(row.cells, this.conv.parseRuntime.selectedColumns));
|
||
|
} else {
|
||
|
csvLines.push(row.cells);
|
||
|
}
|
||
|
|
||
|
left = "";
|
||
|
} else {
|
||
|
left = line + (getEol(line, this.conv.parseRuntime) || "\n");
|
||
|
}
|
||
|
}
|
||
|
return { rowsCells: csvLines, partial: left };
|
||
|
}
|
||
|
}
|
||
|
export interface MultipleRowResult {
|
||
|
rowsCells: string[][];
|
||
|
partial: string;
|
||
|
}
|
||
|
export interface RowSplitResult {
|
||
|
/**
|
||
|
* csv row array. ["a","b","c"]
|
||
|
*/
|
||
|
cells: string[],
|
||
|
/**
|
||
|
* if the passed fileline is a complete row
|
||
|
*/
|
||
|
closed: boolean
|
||
|
}
|
||
|
|