import re
from email.utils import parsedate_to_datetime
from typing import TYPE_CHECKING, Sequence, Optional

from debputy.linting.lint_util import LintState, te_range_to_lsp
from debputy.lsp.lsp_features import (
    lsp_standard_handler,
    SecondaryLanguage,
    LanguageDispatchRule,
    lint_diagnostics,
    lsp_document_link,
)
from debputy.lsp.quickfixes import (
    propose_correct_text_quick_fix,
)
from debputy.lsp.spellchecking import spellcheck_line
from debputy.util import PKGVERSION_REGEX

try:
    from debputy.lsp.vendoring._deb822_repro.locatable import (
        Position as TEPosition,
        Range as TERange,
    )

    from pygls.server import LanguageServer
    from pygls.workspace import TextDocument
    from debputy.lsp.debputy_ls import DebputyLanguageServer
except ImportError:
    pass


if TYPE_CHECKING:
    import lsprotocol.types as types
else:
    import debputy.lsprotocol.types as types


# Same as Lintian
_MAXIMUM_WIDTH: int = 82
_HEADER_LINE = re.compile(r"^(\S+)\s*[(]([^)]+)[)]")  # TODO: Add rest
_DISPATCH_RULE = LanguageDispatchRule.new_rule(
    "debian/changelog",
    None,
    ("debian/changelog", "debian/changelog.dch"),
    [
        # emacs's name
        SecondaryLanguage("debian-changelog"),
        # vim's name
        SecondaryLanguage("debchangelog"),
        SecondaryLanguage("dch"),
    ],
)


_WEEKDAYS_BY_IDX = [
    "Mon",
    "Tue",
    "Wed",
    "Thu",
    "Fri",
    "Sat",
    "Sun",
]
_KNOWN_WEEK_DAYS = frozenset(_WEEKDAYS_BY_IDX)
_BUG_LINKS_FINDER_REGEX = re.compile(
    r"""
    (?:closes:|see:?) \s* (?:bug \s*)? \#?\s?\d+ (?:, \s*(?:bug)? \#?\s?\d+)*
    """,
    re.I | re.VERBOSE,
)
_INDIVIDUAL_BUGS_REGEX = re.compile(
    r"""(?:bug\s?)?(?:#\s?)?(\d+)""",
    re.I,
)


lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_CODE_ACTION)
lsp_standard_handler(_DISPATCH_RULE, types.TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL)

DPM_DCH_SECTION = "Policy 4.4"


def _check_footer_date(
    lint_state: LintState,
    line: str,
    line_no: int,
    line_len: int,
    start_date_idx: int,
) -> None:
    # 3 characters for the day name (Mon), then a comma plus a space followed by the
    # actual date. The 6 characters limit is a gross under estimation of the real
    # size.
    if line_len < start_date_idx + 6:
        text_range = _single_line_subrange(line_no, start_date_idx, line_len)
        lint_state.emit_diagnostic(
            text_range,
            "Expected a date in RFC822 format (Tue, 12 Mar 2024 12:34:56 +0000)",
            "error",
            DPM_DCH_SECTION,
        )
        return
    day_name_range = _single_line_subrange(line_no, start_date_idx, start_date_idx + 3)
    day_name = line[start_date_idx : start_date_idx + 3]
    if day_name not in _KNOWN_WEEK_DAYS:
        lint_state.emit_diagnostic(
            day_name_range,
            "Expected a three letter date here using US English format (Mon, Tue, ..., Sun)",
            "error",
            DPM_DCH_SECTION,
        )
        return

    date_str = line[start_date_idx + 5 :]

    if line[start_date_idx + 3 : start_date_idx + 5] != ", ":
        sep = line[start_date_idx + 3 : start_date_idx + 5]
        text_range = _single_line_subrange(
            line_no,
            start_date_idx + 3,
            start_date_idx + 5,
        )
        lint_state.emit_diagnostic(
            text_range,
            f'Improper formatting of date. Expected ", " here, not "{sep}"',
            "error",
            DPM_DCH_SECTION,
        )
        return

    try:
        # FIXME: this parser is too forgiving (it ignores trailing garbage)
        date = parsedate_to_datetime(date_str)
    except ValueError as e:
        error_range = _single_line_subrange(line_no, start_date_idx + 5, line_len)
        lint_state.emit_diagnostic(
            error_range,
            f"Unable to parse the date as a valid RFC822 date: {e.args[0]}",
            "error",
            "debputy",
        )
        return
    expected_week_day = _WEEKDAYS_BY_IDX[date.weekday()]
    if expected_week_day != day_name:
        lint_state.emit_diagnostic(
            day_name_range,
            f"The date was a {expected_week_day}day",
            "warning",
            "debputy",
            quickfixes=[propose_correct_text_quick_fix(expected_week_day)],
        )


def _check_email(
    lint_state: LintState,
    line: str,
    line_no: int,
    line_len: int,
) -> tuple[int, int]:
    email_start_idx = _offset_of(line, "<", 4)
    email_end_idx = _offset_of(line, ">", max(email_start_idx, 4))

    if not (3 < email_start_idx < email_end_idx):
        # Email invalid
        if email_start_idx >= 3:
            msg = 'Missing closing ">" to finish email address before the sign off date'
            diag_start = email_start_idx
            diag_end = _offset_of(
                line,
                " ",
                email_start_idx,
                offset_if_missing=line_len,
            )
        else:
            if email_end_idx > -1:
                diag_start = 1 + _roffset_of(
                    line,
                    " ",
                    4,
                    email_end_idx - 1,
                    offset_if_missing=4,
                )
                if diag_start > 4:
                    email_start_idx = diag_start
                # For consistency, we always include the trailing `>`.
                diag_end = email_end_idx + 1
            else:
                diag_start = 4
                diag_end = _offset_of(
                    line,
                    "  ",
                    diag_start,
                    offset_if_missing=line_len,
                )
            msg = 'Missing opening "<" to start the email address after the name'
        lint_state.emit_diagnostic(
            _single_line_subrange(line_no, diag_start, diag_end),
            msg,
            "error",
            DPM_DCH_SECTION,
        )
        if email_end_idx < 0:
            email_end_idx = diag_end
    return email_start_idx, email_end_idx


def _check_footer_line(
    lint_state: LintState,
    line: str,
    line_no: int,
) -> None:
    if line.rstrip() == " --":
        lint_state.emit_diagnostic(
            _single_line_subrange(line_no, 0, 3),
            'Missing "Name <email@example.com>"',
            "error",
            DPM_DCH_SECTION,
            # TODO: Add quick fix to insert all the relevant data.
        )
        return
    line_len = len(line)
    if not line.startswith(" -- "):
        # Pre-condition for this function being called.
        assert line.startswith(" --") and line_len > 3
        lint_state.emit_diagnostic(
            _single_line_subrange(line_no, 0, line_len),
            'Start of sign-off line should be " -- ".',
            "error",
            DPM_DCH_SECTION,
            quickfixes=[propose_correct_text_quick_fix(" -- ")],
        )
        return

    email_start_idx, email_end_idx = _check_email(
        lint_state,
        line,
        line_no,
        line_len,
    )

    start_of_email_identified = email_start_idx > 3
    end_of_email_identified = 4 < email_end_idx < line_len
    if not start_of_email_identified:
        return

    # Email valid, name might be missing
    name_start = 4
    name_end = email_start_idx
    name = line[name_start:name_end]
    if not name or name.isspace():
        # The range must always be at least one character width. There is no good direction
        # to expand it in. Picked left because we know there is a space character,
        if name_end - name_start < 1:
            name_start -= 1
        lint_state.emit_diagnostic(
            _single_line_subrange(line_no, name_start, name_end),
            "Missing name before email",
            "error",
            DPM_DCH_SECTION,
        )
    elif not name.endswith(" ") or len(name.strip()) != len(name) - 1:
        lint_state.emit_diagnostic(
            _single_line_subrange(line_no, name_start, name_end),
            "Non-standard spacing around the name",
            "warning",
            DPM_DCH_SECTION,
            quickfixes=[
                propose_correct_text_quick_fix(
                    name.strip() + " ",
                    proposed_title="Fix spacing",
                )
            ],
        )
    if not end_of_email_identified:
        # If we are unsure where the email is, we currently do not have the wherewithal to identify
        # where the date is. Technically, there are cases where we could identify the date and work
        # back from there. Not written because I thought it is too much effort for the value at the
        # time I put in this comment.
        #
        # Note this will already have triggered a diagnostic.
        return
    post_email = line[email_end_idx + 1 :]
    if not post_email or post_email.isspace():
        lint_state.emit_diagnostic(
            _single_line_subrange(line_no, 0, line_len),
            "Missing sign off date",
            "error",
            DPM_DCH_SECTION,
        )
        return
    start_date_idx = email_end_idx + (len(post_email) - len(post_email.lstrip())) + 1
    space_len = start_date_idx - email_end_idx - 1
    if line[email_end_idx + 1 : start_date_idx] != "  ":
        correction = "  "
        diag_start = email_end_idx + 1
        diag_end = start_date_idx
        if not space_len:
            # If there is no spaces, then we mark the closing `>` and the following character instead if possible.
            #
            # Note the choice here of including both boundary characters is because some editors refuse to include
            # the lone `>` in a codeAction range with the user explicitly doing a selection range, so our range
            # detection will miss it. By including the following character, we ensure there is always a two
            # character range to place the cursor in the middle of and the editors tend to respect that as a valid
            # range (also, single character ranges are harder for the user to see).
            diag_start = email_end_idx
            diag_end = min(start_date_idx + 1, line_len)
            if start_date_idx < line_len:
                end_char = line[start_date_idx]
            else:
                end_char = ""
            correction = f">  {end_char}"

        lint_state.emit_diagnostic(
            _single_line_subrange(
                line_no,
                diag_start,
                diag_end,
            ),
            "Must be exactly two spaces between email and sign off date",
            "error",
            DPM_DCH_SECTION,
            quickfixes=[
                propose_correct_text_quick_fix(
                    correction,
                    proposed_title="Fix spacing between email and date",
                ),
            ],
        )
    _check_footer_date(lint_state, line, line_no, line_len, start_date_idx)


def _offset_of(
    text: str,
    ch: str,
    /,
    start: int | None = None,
    end: int | None = None,
    *,
    offset_if_missing: int = -1,
) -> int:
    try:
        return text.index(ch, start, end)
    except ValueError:
        return offset_if_missing


def _roffset_of(
    text: str,
    ch: str,
    /,
    start: int | None = None,
    end: int | None = None,
    *,
    offset_if_missing: int = -1,
) -> int:
    try:
        return text.rindex(ch, start, end)
    except ValueError:
        return offset_if_missing


def _single_line_subrange(
    line_no: int,
    character_start_pos: int,
    character_end_pos: int,
) -> "TERange":
    return TERange(
        TEPosition(
            line_no,
            character_start_pos,
        ),
        TEPosition(
            line_no,
            character_end_pos,
        ),
    )


def _check_header_line(
    lint_state: LintState,
    line: str,
    line_no: int,
    entry_no: int,
) -> None:
    m = _HEADER_LINE.search(line)
    if not m:
        # Syntax error: TODO flag later
        return
    source_name, source_version = m.groups()
    dctrl_source_pkg = lint_state.source_package
    if (
        entry_no == 1
        and dctrl_source_pkg is not None
        and dctrl_source_pkg.fields.get("Source") != source_name
    ):
        expected_name = dctrl_source_pkg.fields.get("Source")
        start_pos, end_pos = m.span(1)
        name_range = _single_line_subrange(line_no, start_pos, end_pos)
        if expected_name is None:
            msg = (
                "The first entry must use the same source name as debian/control."
                ' The d/control file is missing the "Source" field in its first stanza'
            )
        else:
            msg = (
                "The first entry must use the same source name as debian/control."
                f' Changelog uses: "{source_name}" while d/control uses: "{expected_name}"'
            )

        lint_state.emit_diagnostic(
            name_range,
            msg,
            "error",
            "dpkg",  # man:deb-src-control(5) / #1089794
        )
    if not PKGVERSION_REGEX.fullmatch(source_version):
        vm = PKGVERSION_REGEX.search(source_version)
        start_pos, end_pos = m.span(2)
        if vm:
            start_valid, end_valid = vm.span(0)
            invalid_ranges = []
            if start_valid > 0:
                name_range = _single_line_subrange(
                    line_no,
                    start_pos,
                    start_pos + start_valid,
                )
                invalid_ranges.append(name_range)

            if end_valid < len(source_version):
                name_range = _single_line_subrange(
                    line_no,
                    start_pos + end_valid,
                    end_pos,
                )
                invalid_ranges.append(name_range)

            for r in invalid_ranges:
                lint_state.emit_diagnostic(
                    r,
                    "This part cannot be parsed as a valid Debian version",
                    "error",
                    "Policy 5.6.12",
                )
        else:
            name_range = _single_line_subrange(line_no, start_pos, end_pos)
            lint_state.emit_diagnostic(
                name_range,
                f'Cannot parse "{source_version}" as a Debian version.',
                "error",
                "Policy 5.6.12",
            )
    elif "dsfg" in source_version:
        typo_index = source_version.index("dsfg")
        start_pos, end_pos = m.span(2)

        name_range = _single_line_subrange(
            line_no,
            start_pos + typo_index,
            start_pos + typo_index + 4,
        )
        lint_state.emit_diagnostic(
            name_range,
            'Typo of "dfsg" (Debian Free Software Guidelines)',
            "pedantic",
            "debputy",
            quickfixes=[propose_correct_text_quick_fix("dfsg")],
        )


@lint_diagnostics(_DISPATCH_RULE)
async def _lint_debian_changelog(lint_state: LintState) -> None:
    lines = lint_state.lines
    entry_no = 0
    entry_limit = 2
    max_words = 1000
    max_line_length = _MAXIMUM_WIDTH
    for line_no, line in enumerate(lines):
        orig_line = line
        line = line.rstrip()
        if not line:
            continue
        if line.startswith(" --"):
            _check_footer_line(lint_state, line, line_no)
            continue
        if not line.startswith("  "):
            if not line[0].isspace():
                entry_no += 1
                # Figure out the right cut which may not be as simple as just the
                # top two.
                if entry_no > entry_limit:
                    break
                _check_header_line(
                    lint_state,
                    line,
                    line_no,
                    entry_no,
                )
            continue
        # minus 1 for newline
        orig_line_len = len(orig_line) - 1
        if orig_line_len > max_line_length:
            exceeded_line_range = _single_line_subrange(
                line_no,
                max_line_length,
                orig_line_len,
            )
            lint_state.emit_diagnostic(
                exceeded_line_range,
                f"Line exceeds {max_line_length} characters",
                "pedantic",
                "debputy",
            )
        if len(line) > 3 and line[2] == "[" and line[-1] == "]":
            # Do not spell check [ X ] as X is usually a name
            continue
        if max_words > 0:
            new_diagnostics = spellcheck_line(lint_state, line_no, line)
            max_words -= new_diagnostics


@lsp_document_link(_DISPATCH_RULE)
def _debian_changelog_links(
    ls: "DebputyLanguageServer",
    params: types.DocumentLinkParams,
) -> Optional[Sequence[types.DocumentLink]]:
    doc = ls.workspace.get_text_document(params.text_document.uri)
    lines = doc.lines
    links = []

    for line_no, line in enumerate(lines):
        if not line.startswith("  "):
            continue
        bug_line_match = _BUG_LINKS_FINDER_REGEX.search(line)
        if not bug_line_match:
            continue
        bug_offset = bug_line_match.start(0)
        for bug_match in _INDIVIDUAL_BUGS_REGEX.finditer(bug_line_match.group(0)):
            bug_id = bug_match.group(1)
            bug_span = bug_match.span()
            bug_range = _single_line_subrange(
                line_no,
                bug_span[0] + bug_offset,
                bug_span[1] + bug_offset,
            )
            bug_range_client_units = doc.position_codec.range_to_client_units(
                lines,
                te_range_to_lsp(bug_range),
            )
            links.append(
                types.DocumentLink(
                    bug_range_client_units, f"https://bugs.debian.org/{bug_id}"
                )
            )

            total_links = len(links)
            if total_links >= 100:
                break

    return links
