import {DateTime} from "luxon"
import * as U from '../Utils'
import * as STT from "../StypeTools"
import * as Colors from "../Colors"
import {DataValue} from "../Concepts/Basic"
import {ParsedType, SemanticType} from "../Concepts/SemanticType"
import Iconv from "iconv-lite"
import {isSafari} from 'react-device-detect'
import $t from "../i18n/i18n";

// https://github.com/ashtuchkin/iconv-lite/wiki/Supported-Encodings
export const encodings = ['utf-8', 'utf-16be', 'utf-16le', 'windows-1251', 'ibm866', 'iso-8859-2', 'iso-8859-3', 'iso-8859-4', 'iso-8859-5', 'iso-8859-6', 'iso-8859-7', 'iso-8859-8', 'iso-8859-10', 'iso-8859-13', 'iso-8859-14', 'iso-8859-15', 'iso-8859-16', 'koi8-r', 'koi8-u', 'windows-874', 'windows-1250', 'windows-1252', 'windows-1253', 'windows-1254', 'windows-1255', 'windows-1256', 'windows-1257', 'windows-1258', 'windows-949', 'macintosh', 'mac-cyrillic', 'mac-ukraine', 'mac-croatian', 'mac-greek', 'mac-iceland', 'mac-roman', 'mac-romania', 'mac-thai', 'mac-turkish', 'mac-centeuro', 'gbk', 'gb18030', 'gb2312', 'big5', 'euc-jp', 'shift-jis', 'euc-kr', 'ks-c-5601', 'utf-32be', 'utf-32le'] as const

export type Encoding = typeof encodings[number]

const localeToEncoding: [string, Encoding][] = [["ar", "windows-1256"], ["ba", "windows-1251"], ["be", "windows-1251"], ["bg", "windows-1251"], ["cs", "windows-1250"], ["el", "iso-8859-7"], ["et", "windows-1257"], ["fa", "windows-1256"], ["he", "windows-1255"], ["hr", "windows-1250"], ["hu", "iso-8859-2"], ["ja", "shift-jis"], ["kk", "windows-1251"], ["ko", "euc-kr"], ["ku", "windows-1254"], ["ky", "windows-1251"], ["lt", "windows-1257"], ["lv", "windows-1257"], ["mk", "windows-1251"], ["pl", "iso-8859-2"], ["ru", "windows-1251"], ["sah", "windows-1251"], ["sk", "windows-1250"], ["sl", "iso-8859-2"], ["sr", "windows-1251"], ["tg", "windows-1251"], ["th", "windows-874"], ["tr", "windows-1254"], ["tt", "windows-1251"], ["uk", "windows-1251"], ["vi", "windows-1258"], ["zh-CN", "gb18030"], ["zh-TW", "big5"]]

export const nullValues = ["null", "nan", "none", "undefined"]

export type DataWithoutBOM = Uint8Array
export type DataWithBOM = Uint8Array

enum EncodingType {
    ASCII = 1,
    UTF8,
    UTF16_BE,
    UTF16_LE,
    UTF32_BE,
    UTF32_LE
}

export const UniversalEOL = "\r\n"

export interface ParsingParams {
    encoding: Encoding
    valueSeparator: string,
    lineSeparator: string,
    textQualifier: string,
    escapeChar: string,
}

export interface ImportedColumnType {
    manual: SemanticType | undefined
    possible: SemanticType[]
    recommended: SemanticType
}

export interface ColumnParams {
    guessedHeaderRows: number
    manualHeaderRows: number | undefined
    columnTypes: ImportedColumnType[],
    columnAffixes: Map<number, { prefix: string | undefined, postfix: string | undefined }>
}

export interface ImportParams extends ParsingParams, ColumnParams {
    bomByteCount: number
    confident: boolean
    workspace?: boolean
}

export const actualColumnType = (type: ImportedColumnType) => type.manual ?? type.recommended

export const actualHeaderRows = (params: ImportParams) => params.manualHeaderRows ?? params.guessedHeaderRows

function isUTF8(data: DataWithoutBOM): boolean {
    // taken from https://stackoverflow.com/questions/1031645/how-to-detect-utf-8-in-plain-c
    let i = 0
    while (i < data.length) {
        if ((// ASCII
            // a stricter option: data[i] === 0x09 || data[i] === 0x0A || data[i] === 0x0D || (0x20 <= data[i] && data[i] <= 0x7E)
            data[i] > 0 && data[i] <= 0x7E // allow ASCII control characters
        )) {
            i += 1
            continue
        }

        if ((// non-overlong 2-byte
            i + 1 < data.length &&
            (0xC2 <= data[i] && data[i] <= 0xDF) &&
            (0x80 <= data[i + 1] && data[i + 1] <= 0xBF)
        )) {
            i += 2
            continue
        }

        if (i + 2 < data.length && (
            (// excluding overlongs
                data[i] === 0xE0 &&
                (0xA0 <= data[i + 1] && data[i + 1] <= 0xBF) &&
                (0x80 <= data[i + 2] && data[i + 2] <= 0xBF)
            ) ||
            (// straight 3-byte
                ((0xE1 <= data[i] && data[i] <= 0xEC) ||
                    data[i] === 0xEE ||
                    data[i] === 0xEF) &&
                (0x80 <= data[i + 1] && data[i + 1] <= 0xBF) &&
                (0x80 <= data[i + 2] && data[i + 2] <= 0xBF)
            ) ||
            (// excluding surrogates
                data[i] === 0xED &&
                (0x80 <= data[i + 1] && data[i + 1] <= 0x9F) &&
                (0x80 <= data[i + 2] && data[i + 2] <= 0xBF)
            )
        )) {
            i += 3
            continue
        }

        if (i + 3 < data.length && (
            (// planes 1-3
                data[i] === 0xF0 &&
                (0x90 <= data[i + 1] && data[i + 1] <= 0xBF) &&
                (0x80 <= data[i + 2] && data[i + 2] <= 0xBF) &&
                (0x80 <= data[i + 3] && data[i + 3] <= 0xBF)
            ) ||
            (// planes 4-15
                (0xF1 <= data[i] && data[i] <= 0xF3) &&
                (0x80 <= data[i + 1] && data[i + 1] <= 0xBF) &&
                (0x80 <= data[i + 2] && data[i + 2] <= 0xBF) &&
                (0x80 <= data[i + 3] && data[i + 3] <= 0xBF)
            ) ||
            (// plane 16
                data[i] === 0xF4 &&
                (0x80 <= data[i + 1] && data[i + 1] <= 0x8F) &&
                (0x80 <= data[i + 2] && data[i + 2] <= 0xBF) &&
                (0x80 <= data[i + 3] && data[i + 3] <= 0xBF)
            )
        )) {
            i += 4
            continue
        }
        return false
    }
    return true
}

function detectEncodingType(data: DataWithBOM): { encodingType: EncodingType, bomByteCount: number, confident: boolean } {
    // BOM: https://en.wikipedia.org/wiki/Byte_order_mark

    if (data.length) {
        let bomByteCount = 0, confident = false, encodingType: EncodingType | undefined

        // detect BOM
        if (data[0] === 0xFE && data[1] === 0xFF) {
            bomByteCount = 2
            encodingType = EncodingType.UTF16_BE
            confident = true
        } else if (data[0] === 0xFF && data[1] === 0xFE && (data[2] !== 0 || data[3] !== 0)) {
            bomByteCount = 2
            encodingType = EncodingType.UTF16_LE
            confident = true
        } else if (data[0] === 0xEF && data[1] === 0xBB && data[2] === 0xBF) {
            bomByteCount = 3
            encodingType = EncodingType.UTF8
            confident = true
        } else if (data[0] === 0 && data[1] === 0 && data[2] === 0xFE && data[3] === 0xFF) {
            bomByteCount = 4
            encodingType = EncodingType.UTF32_BE
            confident = true
        } else if (data[0] === 0xFF && data[1] === 0xFE && data[2] === 0 && data[3] === 0) {
            bomByteCount = 4
            encodingType = EncodingType.UTF32_LE
            confident = true
        }

        // detect ASCII
        if (!encodingType) {
            encodingType = EncodingType.ASCII
            confident = true
            for (let i = 0; i < data.length; i++) {
                const c = data[i]
                if ((c < 0x20 && c !== 0x09 && c !== 0x0A && c !== 0x0D) || c > 0x7E) {
                    encodingType = undefined
                    confident = false
                    break
                }
            }
        }

        // detect UTF-32
        if (!encodingType) {
            let be = true, le = true
            for (let i = 0; i < data.length; i++) {
                if (i % 4 === 0 && data[i] !== 0) {
                    be = false
                } else if (i % 4 === 3 && data[i] !== 0) {
                    le = false
                }
                if (!be && !le) {
                    break
                }
            }
            if (be && !le) {
                encodingType = EncodingType.UTF32_BE
            } else if (!be && le) {
                encodingType = EncodingType.UTF32_LE
            }
        }

        // detect UTF-16
        if (!encodingType) {
            let be = true, le = true, firstByte: number | undefined = undefined, mayBeBE = false, mayBeLE = false
            for (let i = 0; i < data.length; i++) {
                const c = data[i]
                if (i % 2 === 0) {
                    if (c !== 0) {
                        be = false
                    }
                    firstByte = data[i]
                } else {
                    if (c !== 0) {
                        le = false
                    }
                    if (firstByte === 0 && (c === 0x0A || c === 0x0D)) {
                        mayBeBE = true
                    } else if (c === 0 && (firstByte === 0x0A || firstByte === 0x0D)) {
                        mayBeLE = true
                    }
                }
            }
            if ((be && !le) || (mayBeBE && !mayBeLE)) {
                encodingType = EncodingType.UTF16_BE
                confident = be && !le
            } else if ((!be && le) || (!mayBeBE && mayBeLE)) {
                encodingType = EncodingType.UTF16_LE
                confident = !be && le
            }
        }

        // detect UTF-8
        if (!encodingType) {
            if (isUTF8(data)) {
                encodingType = EncodingType.UTF8
                confident = true
            }
        }

        return {encodingType: encodingType ?? EncodingType.ASCII, bomByteCount, confident}
    } else {
        return {encodingType: EncodingType.ASCII, bomByteCount: 0, confident: false}
    }
}

class DateTimeFormat {
    static readonly ISO: DateTimeFormat = new DateTimeFormat("ISO", true, true)
    static readonly HTTP: DateTimeFormat = new DateTimeFormat("HTTP", true, true)
    static readonly RFC2822: DateTimeFormat = new DateTimeFormat("RFC2822", true, true)
    static readonly ELAPSEDTIME: DateTimeFormat = new DateTimeFormat("ELAPSEDTIME", false, true, true)

    constructor(readonly format: string, readonly hasDate: boolean, readonly hasTime: boolean, readonly elapsedTime = false) {
        if (!hasDate && !hasTime) {
            throw new Error(`"${format.toString()}" somehow has no time neither date`)
        }
    }

    parse(value: string, locale: string, zone: string): DateTime {
        switch (this.format) {
            case "ISO":
                return /\dT\d/.test(value)
                    ? DateTime.fromISO(value)
                    : DateTime.invalid("doesn't match full ISO format");
            case "HTTP":
                return DateTime.fromHTTP(value)
            case "RFC2822":
                return DateTime.fromRFC2822(value)
            case "ELAPSEDTIME":
                const t = /^(?<hr>-?\d+):(?<min>-?[0-5]\d?)(:(?<sec>-?[0-5]\d?(\.\d+)?))?$/.exec(value)
                if (t !== null && t.groups) {
                    const hours = U.parseIntNotNan(t.groups['hr']),
                        mins = U.parseIntNotNan(t.groups['min']) * Math.sign(hours === 0 ? 1 : hours),
                        secs = parseFloat(t.groups['sec'] ?? '0') * Math.sign(hours === 0 ? 1 : hours)
                    return DateTime.fromSeconds(hours * 3600 + mins * 60 + secs, {zone: 'UTC'})
                } else {
                    return DateTime.invalid("doesn't match elapsed time format")
                }
            default:
                return DateTime.fromFormat(value, this.format, {
                    locale,
                    zone: this.hasDate && this.hasTime ? zone : 'UTC'
                })
        }
    }
}

type DetectedColumnType = {
    ptype: ParsedType,
    emptyValue: false,
    mixedTypes: false
} | {
    emptyValue: true,
    mixedTypes: false
} | {
    emptyValue: false,
    mixedTypes: true
}

type EmptyValueEquivalent = {
    value: string,
    found: true,
    invalid: false
} | {
    found: false,
    invalid: false
} | {
    found: true,
    invalid: true
} | {
    found: false,
    invalid: true
}

type ColumnInfo = {
    detectedColumnType: DetectedColumnType
    dateTimeParser?: DateTimeFormat
    dateTimeLocale?: string
    allValuesSameLength?: number | null
    nullValues: number
    categoryValues: Set<string>
    // textual value considered as null-equivalent among number/time values
    emptyValueEquivalent: EmptyValueEquivalent
    prefix?: string
    postfix?: string
}

export class TypeDetector {

    /**
     *  static fields
     */

    protected static readonly nullRexExp = new RegExp(`^(${nullValues.join('|')})$`, 'i')
    protected static readonly numberRegExps = [
        /^(?<prefix>\D*?)(?<value>[+-]?0?\.\d+)(?<postfix>\D*)$/, // .123 | 0.1 | 00.1 | -0.1 | +0.1
        /^(?<prefix>\D*?)(?<value>[+-]?(\d{1,3}[,.\s](\d{3}[,.\s])*\d{3}|\d+)([,.]\d+)?)(?<postfix>\D*)$/, // -1 | 1 000 | 1,234 | 12 345,67 | 12,345.67
        /^(?<prefix>\D*?)(?<value>[+-]?[0-9]?([.,]\d+)?e([+-]?\d+))(?<postfix>\D*)$/, // 1e9 | -1.1e-9 | +.1e3
    ]
    protected static readonly numberSeparators = new Map<string, { decimal: string, group: string }>()
    // this regex is presented as a string just to prevent Safari from throwing an error due to usage of lookbehind
    protected static readonly rxSpaces = new RegExp(isSafari ? '\\s+' : '(?<=\\d)\\s+(?=\\d)', 'g')

    // https://moment.github.io/luxon/#/parsing
    public static readonly dateTimeFormats: DateTimeFormat[] = [
        DateTimeFormat.ISO,
        DateTimeFormat.HTTP,
        DateTimeFormat.RFC2822,
        new DateTimeFormat('yyyy-MM-dd HH:mm', true, true),
        new DateTimeFormat('yyyy-MM-dd HH:mm:ss', true, true),
        new DateTimeFormat('yyyy-MM-dd HH:mm:ss.S', true, true),
        new DateTimeFormat('yyyy-MM-dd HH:mm:ss Z', true, true),
        new DateTimeFormat('yyyy-MM-dd HH:mm:ss ZZ', true, true),
        new DateTimeFormat('yyyy-MM-dd HH:mm:ss ZZZ', true, true),
        new DateTimeFormat('yyyy-MM-dd HH:mm:ss z', true, true),
        new DateTimeFormat('yyyy-MM-dd HH:mm:ss.S Z', true, true),
        new DateTimeFormat('yyyy-MM-dd HH:mm:ss.S ZZ', true, true),
        new DateTimeFormat('yyyy-MM-dd HH:mm:ss.S ZZZ', true, true),
        new DateTimeFormat('yyyy-MM-dd HH:mm:ss.S z', true, true),
        new DateTimeFormat('f', true, true),
        new DateTimeFormat('F', true, true),
        new DateTimeFormat('ff', true, true),
        new DateTimeFormat('FF', true, true),
        new DateTimeFormat('D HH:mm', true, true),
        new DateTimeFormat('D HH:mm:ss', true, true),
        new DateTimeFormat('D HH:mm:ss.S', true, true),
        new DateTimeFormat('d MMMM yyyy HH:mm:ss', true, true),

        DateTimeFormat.ELAPSEDTIME,
        new DateTimeFormat('T', false, true),
        new DateTimeFormat('TT', false, true),
        new DateTimeFormat('t', false, true),
        new DateTimeFormat('tt', false, true),

        new DateTimeFormat('D', true, false),
        new DateTimeFormat('DD', true, false),
        new DateTimeFormat('DDD', true, false),
        new DateTimeFormat('DDDD', true, false),
        new DateTimeFormat('yyyy-MM-dd', true, false),
        new DateTimeFormat('MM/dd/yyyy', true, false),
        new DateTimeFormat('MM/dd/yy', true, false),
        new DateTimeFormat('dd.MM.yy', true, false),
        new DateTimeFormat('dd.MM.yyyy', true, false),
        new DateTimeFormat('dd MMM yyyy', true, false),
        new DateTimeFormat('dd-MMM-yy', true, false),
        new DateTimeFormat('d MMMM yyyy', true, false),
    ]

    /**
     *  static methods
     */

    protected static parseNull(text: string): boolean {
        return text.trim().length === 0 || TypeDetector.nullRexExp.test(text)
    }

    protected static parseNumber(text: string, useLocales: string[]): { value: number, prefix?: string, postfix?: string } | undefined {
        text = text.replace(TypeDetector.rxSpaces, '')

        if (/^[-+]?\d+(\.\d*)?$/.test(text)) {
            return {value: Number(text)}
        }

        for (const rx of TypeDetector.numberRegExps) {
            const match = rx.exec(text)
            if (match !== null) {
                const groups = match.groups
                if (groups !== undefined) {
                    for (const locale of [...useLocales, 'en-US']) {
                        const {decimal, group} = TypeDetector.getNumberSeparators(locale)
                        const valueWithNoGroups = group ? groups['value'].replace(new RegExp('\\' + group, 'g'), '') : groups['value']
                        let normalizedValue = Number(valueWithNoGroups.replace(new RegExp('\\' + decimal, 'g'), '.'))
                        if (isFinite(normalizedValue)) {
                            const prefix = groups['prefix']?.trim()
                            let postfix = groups['postfix']
                            if (postfix === 'K') {
                                normalizedValue *= 1_000
                                postfix = ''
                            } else if (postfix === 'M') {
                                normalizedValue *= 1_000_000
                                postfix = ''
                            } else if (postfix !== undefined) {
                                postfix = postfix.trim()
                            }
                            return {
                                value: normalizedValue,
                                prefix: prefix ? prefix : undefined,
                                postfix: postfix ? postfix : undefined
                            }
                        }
                    }
                }
            }
        }
    }

    static parseDateTime(text: string, useLocales: string[], useTimeZone: string, useFormat?: DateTimeFormat): { ptype: ParsedType, value: number, dateTimeParser: DateTimeFormat | undefined, dateTimeLocale: string | undefined } | undefined {
        const formats = useFormat ? [useFormat, ...TypeDetector.dateTimeFormats] : TypeDetector.dateTimeFormats
        for (const locale of [...useLocales, 'en-US']) {
            let dt = DateTime.invalid('initial value'),
                matchedFormat: DateTimeFormat | undefined,
                matchedLocale: string | undefined
            for (const format of formats) {
                dt = format.parse(text, locale, useTimeZone)
                if (dt.isValid) {
                    matchedFormat = format
                    matchedLocale = locale
                    break
                }
            }

            if (dt.isValid) {
                const format = U.def(matchedFormat)
                return {
                    ptype: format.hasTime
                        ? (format.hasDate
                            ? ParsedType.DateTime
                            : ParsedType.Time)
                        : ParsedType.Date,
                    value: format.hasTime
                        ? (format.hasDate || format.elapsedTime
                            ? (dt.toSeconds() * 1000)
                            : (dt.toSeconds() * 1000 % 86400000))
                        : Math.round(dt.toSeconds() / 86400) * 86400000,
                    dateTimeParser: matchedFormat,
                    dateTimeLocale: matchedLocale
                }
            }
        }
    }

    static parseValue(value: string, stype: SemanticType, useLocales: string[], useTimeZone: string, useDateTimeParser?: DateTimeFormat)
        : { value: DataValue, dateTimeParser?: DateTimeFormat, dateTimeLocale?: string } {
        if (TypeDetector.parseNull(value)) {
            return {value: null}
        }
        if (STT.isNumeric(stype)) {
            const num = TypeDetector.parseNumber(value, useLocales)
            return {value: num === undefined ? null : num.value}
        }
        if (STT.isDateOrTime(stype)) {
            const dt = TypeDetector.parseDateTime(value, useLocales, useTimeZone, useDateTimeParser)
            return {value: dt?.value ?? null, dateTimeParser: dt?.dateTimeParser, dateTimeLocale: dt?.dateTimeLocale}
        }
        return {value}
    }

    protected static detectValueType(value: string, useLocales: string[], useTimeZone: string, useDateTimeParser?: DateTimeFormat, shouldBeOfType?: ParsedType)
        : { ptype: ParsedType, dateTimeParser?: DateTimeFormat, dateTimeLocale?: string, prefix?: string, postfix?: string } {

        if (TypeDetector.parseNull(value)) {
            return {ptype: ParsedType.Null}
        }

        if (shouldBeOfType === undefined || shouldBeOfType === ParsedType.Number) {
            const num = TypeDetector.parseNumber(value, useLocales)
            if (num !== undefined) {
                return {ptype: ParsedType.Number, prefix: num?.prefix, postfix: num?.postfix}
            }
        }

        if (shouldBeOfType === undefined || shouldBeOfType === ParsedType.Date || shouldBeOfType === ParsedType.Time || shouldBeOfType === ParsedType.DateTime) {
            const dt = TypeDetector.parseDateTime(value, useLocales, useTimeZone, useDateTimeParser)
            if (dt !== undefined && (shouldBeOfType === undefined || shouldBeOfType === dt.ptype)) {
                return {ptype: dt.ptype, dateTimeParser: dt.dateTimeParser, dateTimeLocale: dt.dateTimeLocale}
            }
        }

        return {ptype: ParsedType.Text}
    }

    protected static getNumberSeparators(locale: string): { decimal: string, group: string } {
        const cached = TypeDetector.numberSeparators.get(locale)
        if (cached) {
            return cached
        } else {
            const parts = Intl.NumberFormat(locale).formatToParts(1000.1)
            const separators = {
                decimal: parts.find(part => part.type === 'decimal')?.value ?? '',
                group: parts.find(part => part.type === 'group')?.value ?? '',
            }
            TypeDetector.numberSeparators.set(locale, separators)
            return separators
        }
    }

    protected static updateColumnInfo(columnInfo: ColumnInfo[], row: string[], locale: string, timezone: string) {
        for (let col = 0; col < row.length; col++) {
            const value = row[col],
                ci = (columnInfo[col] = columnInfo[col] ?? this.initialColumnInfo(TypeDetector.detectValueType(value, [locale], timezone), value))

            this.addCategoryValue(ci.categoryValues, value)

            if (!ci.detectedColumnType.mixedTypes && (ci.detectedColumnType.emptyValue || ci.detectedColumnType.ptype !== ParsedType.Text || !ci.emptyValueEquivalent.invalid)) {
                const result = TypeDetector.detectValueType(
                    value,
                    ci.dateTimeLocale ? [ci.dateTimeLocale, locale] : [locale],
                    timezone,
                    ci.dateTimeParser,
                    ci.detectedColumnType.emptyValue
                        ? undefined // still don't know the alleged column type
                        : (ci.detectedColumnType.mixedTypes
                            ? ParsedType.Text // not interested in value type detection since the types are already mixed
                            : ci.emptyValueEquivalent.invalid
                                ? ci.detectedColumnType.ptype // expect the same column type as detected previously
                                : undefined // still allow some number to appear after a single non-text value
                        )
                )
                ci.dateTimeLocale = result.dateTimeLocale ?? ci.dateTimeLocale
                ci.dateTimeParser = result.dateTimeParser ?? ci.dateTimeParser

                // update empty-value-equivalent for this column
                if (result.ptype !== ParsedType.Null && !ci.emptyValueEquivalent.invalid) {
                    if (result.ptype === ParsedType.Text) {
                        ci.emptyValueEquivalent = ci.emptyValueEquivalent.found && ci.emptyValueEquivalent.value !== value
                            ? {found: true, invalid: true}
                            : {value, found: true, invalid: false}
                    } else if (!ci.detectedColumnType.emptyValue
                        && !ci.detectedColumnType.mixedTypes
                        && ci.detectedColumnType.ptype !== ParsedType.Text
                        && result.ptype !== ci.detectedColumnType.ptype) {
                        ci.emptyValueEquivalent = {found: ci.emptyValueEquivalent.found, invalid: true}
                    }
                }

                if (result.ptype !== ParsedType.Null) {
                    // detect if all the values are of the same length
                    if (ci.allValuesSameLength === undefined) {
                        ci.allValuesSameLength = value.length
                    } else if (ci.allValuesSameLength !== null && ci.allValuesSameLength !== value.length) {
                        ci.allValuesSameLength = null
                    }
                } else {
                    ci.nullValues = (ci.nullValues ?? 0) + 1
                }

                // correct detected column type if it's empty or differs from the newly detected type
                if (ci.detectedColumnType.emptyValue || ci.detectedColumnType.ptype !== result.ptype || ci.prefix !== result.prefix || ci.postfix !== result.postfix) {
                    if (ci.detectedColumnType.emptyValue) {
                        ci.detectedColumnType = result.ptype === ParsedType.Null
                            ? {emptyValue: true, mixedTypes: false}
                            : {ptype: result.ptype, emptyValue: false, mixedTypes: false}
                    } else if (ci.prefix !== result.prefix || ci.postfix !== result.postfix) {
                        if (ci.prefix === undefined) {
                            ci.prefix = result.prefix
                        }
                        if (ci.postfix === undefined) {
                            ci.postfix = result.postfix
                        }
                        ci.detectedColumnType = {emptyValue: false, mixedTypes: true}
                    } else if (result.ptype !== ParsedType.Null) {
                        ci.detectedColumnType = ci.emptyValueEquivalent.invalid
                            ? {emptyValue: false, mixedTypes: true}
                            : {ptype: ci.detectedColumnType.ptype === ParsedType.Text ? result.ptype : ci.detectedColumnType.ptype, emptyValue: false, mixedTypes: false}
                    }
                }
            }
        }
    }

    // skips columns with stype = Skip
    static parseData(rows: string[][], columnTypes: SemanticType[], userLocale: string, userTimeZone: string): DataValue[][] {
        const columnCount = Math.max(...rows.map(r => r.length)),
            dateTimeParser: (DateTimeFormat | undefined)[] = [],
            dateTimeLocale: (string | undefined)[] = [],
            data: DataValue[][] = []

        for (const row of rows) {
            const dataRow = []
            for (let c = 0; c < columnCount; c++) {
                if (!STT.isSkip(columnTypes[c] ?? SemanticType.Text)) {
                    const value = row[c]
                    if (value !== undefined) {
                        const dtLocale = dateTimeLocale[c],
                            useLocales = dtLocale ? [dtLocale, userLocale] : [userLocale],
                            parsingResult = TypeDetector.parseValue(value, columnTypes[c] ?? SemanticType.Text, useLocales, userTimeZone, dateTimeParser[c])
                        dataRow.push(parsingResult.value)
                        dateTimeLocale[c] = parsingResult.dateTimeLocale ?? dateTimeLocale[c]
                        dateTimeParser[c] = parsingResult.dateTimeParser ?? dateTimeParser[c]
                    } else {
                        dataRow.push(null)
                    }
                }
            }
            data.push(dataRow)
        }
        return data
    }

    /**
     *  protected fields
     */

    protected rowIndex = 0
    protected firstRowValues: string[] | undefined
    protected firstRowTypes: { ptype: ParsedType, prefix?: string, postfix?: string }[] | undefined
    protected firstValueRowTypes: { ptype: ParsedType, prefix?: string, postfix?: string }[] | undefined
    protected firstValueRowIndex: number | undefined
    protected columnInfo: ColumnInfo[] | undefined

    /**
     *  protected static methods
     */

    protected static addCategoryValue(set: Set<string>, value: string): Set<string> {
        if (set.size <= Colors.seriesColorCount && !TypeDetector.parseNull(value)) {
            set.add(value)
        }
        return set
    }

    protected static initialColumnInfo(type: { ptype: ParsedType, prefix?: string, postfix?: string }, value: string): ColumnInfo {
        return {
            detectedColumnType: type.ptype === ParsedType.Null
                ? {emptyValue: true, mixedTypes: false}
                : {ptype: type.ptype, emptyValue: false, mixedTypes: false},
            prefix: type.prefix,
            postfix: type.postfix,
            emptyValueEquivalent: type.ptype === ParsedType.Null || type.ptype !== ParsedType.Text
                ? {found: false, invalid: false}
                : {value, found: true, invalid: false},
            nullValues: 0,
            categoryValues: TypeDetector.addCategoryValue(new Set(), value)
        }
    }

    /**
     *  public methods
     */

    constructor(protected useLocale: string, protected useTimezone: string, protected headerRows?: number) {
    }

    processRow(row: string[]) {
        if (this.rowIndex === 0) {
            this.firstRowValues = [...row]
            this.firstRowTypes = this.firstRowValues.map(v => TypeDetector.detectValueType(v, [this.useLocale], this.useTimezone))
            this.firstValueRowIndex = this.headerRows ?? (row.length > 1 ? 1 : 0)
            this.firstValueRowTypes = this.firstRowTypes
        }

        if (this.rowIndex === this.firstValueRowIndex) {
            this.firstValueRowTypes = row.map(v => TypeDetector.detectValueType(v, [this.useLocale], this.useTimezone))
        }

        if (this.rowIndex === 0 || this.rowIndex === this.firstValueRowIndex) {
            this.columnInfo = [...U.def(this.firstValueRowTypes)].map((type, columnIndex) => TypeDetector.initialColumnInfo(type, row[columnIndex]))
        }

        if (this.rowIndex >= U.def(this.firstValueRowIndex)) {
            TypeDetector.updateColumnInfo(U.def(this.columnInfo), row, this.useLocale, this.useTimezone)
        }

        this.rowIndex += 1
    }

    getColumnParams(): ColumnParams {
        if (this.rowIndex === 0) {
            throw Error('You should process at least one row before getting column params')
        }

        const columnInfo = U.def(this.columnInfo),
            firstRowTypes = U.def(this.firstRowTypes)

        // augment columnInfo by additional columns in the first row
        while (firstRowTypes.length > columnInfo.length) {
            columnInfo.push({
                detectedColumnType: {emptyValue: true, mixedTypes: false},
                nullValues: this.rowIndex - 1,
                emptyValueEquivalent: {found: false, invalid: false},
                categoryValues: new Set()
            })
        }

        // returns indices of columns having at least one value
        const getIndicesOfColumnsWithValues = (columns: ColumnInfo[]) => [...columns
            .map((ci, i) => ci.categoryValues.size ? i : undefined)
            .filter(x => x !== undefined)
        ] as number[]

        const indicesOfColumnsWithValues = new Set(getIndicesOfColumnsWithValues(columnInfo)),
            valueIsEqualToNull = (value:string, columnIndex:number) => {
                const eve = columnInfo[columnIndex]?.emptyValueEquivalent
                return eve !== undefined && eve.found && !eve.invalid && eve.value === value
            }

        // detect header rows
        this.headerRows = this.headerRows ?? (
            firstRowTypes.length >= columnInfo.length
            && firstRowTypes.every((t, i) => t.ptype !== ParsedType.Null || !indicesOfColumnsWithValues.has(i))
            && firstRowTypes.filter((t, i) => {
                const columnType = columnInfo[i]?.detectedColumnType ?? {emptyValue: true, mixedTypes: false}
                return indicesOfColumnsWithValues.has(i)
                    && !columnType.emptyValue
                    && !columnType.mixedTypes
                    && (
                        (t.ptype !== columnType.ptype && (this.firstRowValues === undefined || !valueIsEqualToNull(this.firstRowValues[i], i)))
                        || t.prefix !== columnInfo[i].prefix
                        || t.postfix !== columnInfo[i].postfix
                    )
            }).length
                ? 1
                : 0
        )

        // take first-row types into account if the header has gone
        if (this.headerRows === 0) {
            TypeDetector.updateColumnInfo(U.def(this.columnInfo), U.def(this.firstRowValues), this.useLocale, this.useTimezone)
        }

        // make all columns with unknown types Textual
        const columnTypes: { stype: SemanticType, possibleTypes: SemanticType[] }[] = columnInfo.map(cp => {
            const stype = !cp.detectedColumnType.emptyValue && !cp.detectedColumnType.mixedTypes ? STT.semanticTypeFromParsedType(cp.detectedColumnType.ptype) : SemanticType.Text
            return {stype, possibleTypes: STT.getPossibleAlternatives(stype)}
        })

        const mayWellBeCategorical = (columnIndex: number, makeSuch = true) => {
            const categoryType = SemanticType.Category
            if (makeSuch) {
                columnTypes[columnIndex].stype = categoryType
            }
            if (columnTypes[columnIndex].possibleTypes.indexOf(categoryType) < 0) {
                columnTypes[columnIndex].possibleTypes.push(categoryType)
            }
        }

        // take first-row values into account if the header has gone
        if (this.headerRows === 0) {
            columnInfo.forEach((ci, i) => {

                // add first row values into list of category values if they appeared to be not a part of a header
                TypeDetector.addCategoryValue(ci.categoryValues, U.def(this.firstRowValues)[i] ?? '')

                // reset column type to Text if affixes of the first row don't match the affixes of the column
                if (U.mustNotBeNullNorUndefined(this.firstValueRowTypes)) {
                    const type = this.firstValueRowTypes[i]
                    if (type && (type.prefix !== ci.prefix || type.postfix !== ci.postfix))
                        columnTypes[i].stype = SemanticType.Text
                }
            })

            // recalculate some stats
            indicesOfColumnsWithValues.clear()
            getIndicesOfColumnsWithValues(columnInfo).forEach(i => indicesOfColumnsWithValues.add(i))
        }

        // reset column to Text type if Number is too long and of the same length (i.e. looks like an identifies, code, etc.)
        for (let i = 0; i < columnTypes.length; i++) {
            if (STT.isNumeric(columnTypes[i].stype) && (columnInfo[i].allValuesSameLength ?? 0) > 11) {
                columnTypes[i] = {
                    stype: SemanticType.Text,
                    possibleTypes: STT.getPossibleAlternatives(SemanticType.Text)
                }
            }
        }

        // skip columns with empty values
        [...columnTypes.keys()]
            .filter(col => !indicesOfColumnsWithValues.has(col))
            .forEach(col => columnTypes[col].stype = SemanticType.Skip);

        // make text columns categorical if they are suitable
        [...indicesOfColumnsWithValues.values()]
            .filter(col => columnInfo[col].categoryValues.size > 0 && columnInfo[col].categoryValues.size <= Colors.seriesColorCount)
            .forEach(col => {
                return mayWellBeCategorical(col,
                    // only a textual column with at least one repeated value is made (recommended) categorical automatically
                    columnInfo[col].categoryValues.size === 2
                    || (
                        STT.isTextual(columnTypes[col].stype)
                        && columnInfo[col].categoryValues.size < (this.rowIndex - U.def(this.firstValueRowIndex) - (columnInfo[col].nullValues ?? 0))
                    ))
            })

        // build column affixes map
        const columnAffixes: Map<number, { prefix: string | undefined, postfix: string | undefined }> = new Map()
        columnTypes.forEach((type, index) => {
            if (STT.isNumeric(type.stype) && (columnInfo[index].prefix || columnInfo[index].postfix)) {
                columnAffixes.set(index, {prefix: columnInfo[index].prefix, postfix: columnInfo[index].postfix})
            }
        })

        return {
            guessedHeaderRows: this.headerRows,
            manualHeaderRows: undefined,
            columnTypes: columnTypes.map(t => ({
                possible: t.possibleTypes,
                recommended: t.stype,
                manual: undefined
            })),
            columnAffixes
        }
    }
}

export function guessImportParameters(data: DataWithBOM, useLocale = U.settings.locale, useTimeZone = U.settings.timeZone): ImportParams {
    const {encodingType, bomByteCount, confident: confidentOfEncoding} = detectEncodingType(data),
        dataWithoutBOM = data.subarray(bomByteCount),
        localeEncoding = localeToEncoding.filter(le => useLocale.startsWith(le[0]))[0],
        encoding = encodingFromEncodingType(encodingType, localeEncoding ? localeEncoding[1] : undefined),
        lineSeparator = UniversalEOL,
        valueSeparators = [',', ';', '\t', ':', '|', '~'],
        textQualifiers = ['"', "'"],
        escapeChars = ['"', '\\']

    let params: ImportParams | null = null, confidentOfParams = false
    if (data.length > 0) {
        for (const valueSeparator of valueSeparators) {
            for (const textQualifier of textQualifiers) {
                for (const escapeChar of escapeChars) {
                    const parsingParams = {encoding, lineSeparator, valueSeparator, textQualifier, escapeChar},
                        typeDetector = new TypeDetector(useLocale, useTimeZone),
                        result = extractRows(dataWithoutBOM, parsingParams, undefined, undefined, 50, true, typeDetector.processRow.bind(typeDetector))

                    if (!result.dubious && result.rowCount > 1) {
                        const columnParams = typeDetector.getColumnParams()
                        params = {
                            ...parsingParams,
                            ...columnParams,
                            bomByteCount,
                            confident: confidentOfEncoding && columnParams.columnTypes.length > 0
                        }
                        confidentOfParams = true
                        break
                    }
                }
                if (confidentOfParams) {
                    break
                }
            }
            if (confidentOfParams) {
                break
            }
        }
    }

    return params ?? {
        encoding,
        lineSeparator,
        bomByteCount,
        valueSeparator: valueSeparators[0],
        textQualifier: textQualifiers[0],
        escapeChar: escapeChars[0],
        guessedHeaderRows: 0,
        manualHeaderRows: undefined,
        columnTypes: [],
        columnAffixes: new Map(),
        confident: false
    }
}

function encodingTypeFromEncoding(encoding: Encoding): EncodingType {
    switch (encoding) {
        case "utf-8":
            return EncodingType.UTF8
        case "utf-16be":
            return EncodingType.UTF16_BE
        case "utf-16le":
            return EncodingType.UTF16_LE
        case "utf-32be":
            return EncodingType.UTF32_BE
        case "utf-32le":
            return EncodingType.UTF32_LE
        default:
            return EncodingType.ASCII
    }
}

function encodingFromEncodingType(encodingType: EncodingType, asciiEncoding: Encoding = "windows-1252"): Encoding {
    switch (encodingType) {
        case EncodingType.ASCII:
            return asciiEncoding
        case EncodingType.UTF8:
            return "utf-8"
        case EncodingType.UTF16_BE:
            return "utf-16be"
        case EncodingType.UTF16_LE:
            return "utf-16le"
        case EncodingType.UTF32_BE:
            return "utf-32be"
        case EncodingType.UTF32_LE:
            return "utf-32le"
    }
}

function getChar(data: DataWithoutBOM, pos: number, encodingType: EncodingType, charTable?: string[]): { char: string, nextPos: number } {
    let char: string | null = null, code = 0, byteLen = 0
    const d0 = data[pos]

    switch (encodingType) {
        case EncodingType.ASCII:
            char = charTable ? charTable[d0] : String.fromCharCode(d0)
            byteLen = 1
            break
        case EncodingType.UTF8:
            [byteLen, code] = (d0 & 0b10000000) === 0 ? [1, d0]
                : (d0 & 0b11100000) === 0b11000000 ? [2, ((d0 & 31) << 6) | (data[pos + 1] & 63)]
                    : (d0 & 0b11110000) === 0b11100000 ? [3, ((d0 & 15) << 12) | ((data[pos + 1] & 63) << 6) | (data[pos + 2] & 63)]
                        : [4, ((d0 & 7) << 18) | ((data[pos + 1] & 63) << 12) | ((data[pos + 2] & 63) << 6) | (data[pos + 3] & 63)]
            break
        case EncodingType.UTF16_BE:
            code = d0 * 0x100 + data[pos + 1]
            byteLen = 2
            break
        case EncodingType.UTF16_LE:
            code = d0 + data[pos + 1] * 0x100
            byteLen = 2
            break
        case EncodingType.UTF32_BE:
            code = d0 * 0x1000000 + data[pos + 1] * 0x10000 + data[pos + 2] * 0x100 + data[pos + 3]
            byteLen = 4
            break
        case EncodingType.UTF32_LE:
            code = d0 + data[pos + 1] * 0x100 + data[pos + 2] * 0x10000 + data[pos + 3] * 0x1000000
            byteLen = 4
            break
    }

    if (!char) {
        try {
            char = String.fromCodePoint(code)
        } catch (RangeError) {
            char = '?'
        }
    }

    return {char: char, nextPos: pos + byteLen}
}

export type RowIndex = number
export type RowStartPosition = number
export type RowPosCache = Map<RowIndex, RowStartPosition>
export const rowPosCacheStep = 100

export function extractRows(
    data: DataWithoutBOM,
    params: ParsingParams,
    startAt: { index: number, rowPosCache?: RowPosCache } | undefined,
    numberOfRowsToExtract: number | undefined,
    maxValueLength: number | undefined,
    stopOnDubiousRow = false,
    rowProcessor?: (row: string[]) => void
): { rows: string[][], dubious: boolean, rowCount: number } {
    let rowCount = 0
    const rows: string[][] = [],
        addValue = (values: string[], chars: string[]) => values.push(chars.join('').trim()),
        startRow = startAt?.index ?? 0,
        savePositionInCache = (rowIndex: number, position: number | undefined) => {
            if (position && startAt && startAt?.rowPosCache && rowIndex % rowPosCacheStep === 0) {
                startAt.rowPosCache.set(rowIndex, position)
            }
        },
        addRow = (values: string[], nextPosition: number | undefined): boolean => {
            if (values.length && row++ >= startRow && (numberOfRowsToExtract === undefined || rowCount < numberOfRowsToExtract)) {
                rowCount += 1
                const unusualNumberOfColumns = rows.length > 0 && rows[rows.length - 1].length !== values.length
                if (rowProcessor === undefined || rows.length === 0) {
                    rows.push(values)
                }
                if (rowProcessor) {
                    rowProcessor(values)
                }
                savePositionInCache(row, nextPosition)
                return values.length === 1 || unusualNumberOfColumns
            } else {
                savePositionInCache(row, nextPosition)
                return false
            }
        },
        encodingType = encodingTypeFromEncoding(params.encoding),
        charTable = encodingType === EncodingType.ASCII ? Iconv.decode(Buffer.from([...U.range(256)]), params.encoding).split('') : undefined

    let pos = 0, dubious = false, inQuotes = false, inEscape = false, valueChars: string[] = [], newValue = true,
        prevChar = '', values: string[] = [], row = 0

    if (startAt && startAt.index > 0 && startAt.rowPosCache) {
        let lastCachedIndex = 0
        for (const cachedIndex of startAt.rowPosCache.keys()) {
            if (cachedIndex > startAt.index) {
                break
            }
            lastCachedIndex = cachedIndex
        }
        if (lastCachedIndex > 0) {
            pos = U.get(startAt.rowPosCache, lastCachedIndex)
            row = lastCachedIndex
        }
    }

    while ((!stopOnDubiousRow || !dubious) && pos < data.byteLength && (numberOfRowsToExtract === undefined || rowCount < numberOfRowsToExtract)) {
        const {char, nextPos} = getChar(data, pos, encodingType, charTable)

        if (inEscape && params.escapeChar === params.textQualifier && prevChar === params.textQualifier && char !== params.textQualifier) {
            inEscape = false
            inQuotes = false
        }

        if (newValue) {
            if (params.lineSeparator !== UniversalEOL || char !== '\n' || (prevChar !== '\r' && prevChar !== '')) {
                const rowSeparator = params.lineSeparator === UniversalEOL
                    ? (char === '\r' ? '\r' : '\n')
                    : params.lineSeparator
                switch (char) {
                    case rowSeparator:
                        if (values.length > 0) {
                            values.push('')
                        }
                        dubious = addRow(values, nextPos)
                        values = []
                        break
                    case params.valueSeparator:
                        values.push('')
                        break
                    case params.textQualifier:
                        inQuotes = true
                        newValue = false
                        valueChars = []
                        break
                    case '\t':
                    case ' ':
                        break
                    default:
                        inQuotes = false
                        newValue = false
                        valueChars = [char]
                        break
                }
            }
        } else if (inQuotes) {
            switch (char) {
                case params.escapeChar:
                    if (inEscape) {
                        valueChars.push(char)
                        inEscape = false
                    } else {
                        inEscape = true
                    }
                    break
                case params.textQualifier:
                    if (inEscape) {
                        valueChars.push(char)
                        inEscape = false
                    } else {
                        inQuotes = false
                    }
                    break
                default:
                    if (maxValueLength === undefined || valueChars.length < maxValueLength) {
                        valueChars.push(char)
                    }
                    break
            }
        } else if (char === params.lineSeparator || (params.lineSeparator === UniversalEOL && (char === '\r' || char === '\n'))) {
            if (char === params.lineSeparator || (params.lineSeparator === UniversalEOL && (prevChar !== '\r' || char !== '\n'))) {
                addValue(values, valueChars)
                newValue = true
                valueChars = []
                dubious = addRow(values, nextPos)
                values = []
            }
        } else if (char === params.valueSeparator) {
            addValue(values, valueChars)
            newValue = true
            valueChars = []
        } else if (maxValueLength === undefined || valueChars.length < maxValueLength) {
            valueChars.push(char)
        }
        pos = nextPos
        prevChar = char
    }

    if (valueChars.length || (newValue && values.length > 0)) {
        addValue(values, valueChars)
    }
    if (values.length) {
        dubious = addRow(values, undefined)
    }

    if (rows.length > 1_000) {
        throw new Error("Too many rows to be returned")
    }

    return {rows: rowProcessor ? [] : rows, dubious, rowCount}
}

export function extractHeaderRow(data: DataWithBOM, params: ImportParams): string[] {
    const headerLines = extractRows(
        params.bomByteCount ? data.subarray(params.bomByteCount) : data,
        params,
        undefined,
        actualHeaderRows(params),
        30).rows

    return headerLines.length > 0
        ? headerLines[0]
            .map((_, colIndex) => headerLines.map(row => row[colIndex]))
            .map(headerParts => headerParts.join(' '))
        : []
}

export function defaultColumnTitle(i: number, columnTypes?: SemanticType[]) {
    if (columnTypes === undefined) {
        return $t('parser.column', {n: i + 1})
    }
    const skipped = columnTypes.filter((type, index) => index < i && type === SemanticType.Skip).length
    return columnTypes[i] === SemanticType.Skip ? '' : $t('parser.column', {n: i - skipped + 1})
}
