/*
 * Decompiled with CFR 0.152.
 */
package org.zaproxy.zap.spider.parser;

import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.Source;
import net.htmlparser.jericho.StartTag;
import net.htmlparser.jericho.StartTagType;
import org.parosproxy.paros.network.HttpMessage;
import org.zaproxy.zap.spider.SpiderParam;
import org.zaproxy.zap.spider.parser.SpiderParser;

@Deprecated
public class SpiderHtmlParser
extends SpiderParser {
    static final Pattern URL_PATTERN = Pattern.compile("(?:url\\s*=|report-uri)\\s*[\"']?([^;'\"]+)", 2);
    private static final Pattern PLAIN_COMMENTS_URL_PATTERN = Pattern.compile("(?:http(?:s?):)?//[^\\x00-\\x1f\"'\\s<>#()\\[\\]{}]+", 2);
    private static final Pattern INLINE_CONTENT_URL_PATTERN = Pattern.compile("(?:http(?:s?)://|(?:\\s|\\B)//?)[^\\x00-\\x1f\"'\\s<>#()\\[\\]{}]+", 2);
    private static final Pattern SRCSET_PATTERN = Pattern.compile("[^\"'=\\s,]+\\.[^\\s,]+", 2);
    private static final List<String> elementsWithText = Arrays.asList("p", "title", "h1", "h2", "h3", "h4", "h5", "h6", "li", "blockquote");
    private static final String IMPORT_TAG = "IMPORT";
    private boolean baseTagSet;

    public SpiderHtmlParser(SpiderParam params) {
        super(params);
    }

    @Override
    public boolean parseResource(HttpMessage message, Source source, int depth) {
        if (source == null) {
            source = new Source((CharSequence)message.getResponseBody().toString());
        }
        String baseURL = message.getRequestHeader().getURI().toString();
        Element base = source.getFirstElement("base");
        if (base != null) {
            this.getLogger().debug("Base tag was found in HTML: {}", (Object)base.getDebugInfo());
            String href = base.getAttributeValue("href");
            if (href != null && !href.isEmpty()) {
                baseURL = this.getCanonicalURL(href, baseURL);
                this.baseTagSet = true;
            }
        }
        this.parseSource(message, source, depth, baseURL);
        if (this.getSpiderParam().isParseComments()) {
            List comments = source.getAllStartTags(StartTagType.COMMENT);
            for (StartTag comment : comments) {
                Source s = new Source((CharSequence)comment.getTagContent());
                if (this.parseSource(message, s, depth, baseURL)) continue;
                Matcher matcher = PLAIN_COMMENTS_URL_PATTERN.matcher(s.toString());
                while (matcher.find()) {
                    this.processURL(message, depth, matcher.group(), baseURL);
                }
            }
        }
        List doctypes = source.getAllStartTags(StartTagType.DOCTYPE_DECLARATION);
        for (StartTag doctype : doctypes) {
            for (String str : doctype.getTagContent().toString().split(" ")) {
                if (!str.startsWith("\"") || !str.endsWith("\"")) continue;
                this.processURL(message, depth, str.substring(1, str.length() - 1), baseURL);
            }
        }
        return false;
    }

    private void srcSetProcessor(HttpMessage message, int depth, String localURL, String baseURL) {
        Matcher results = SRCSET_PATTERN.matcher(localURL);
        while (results.find()) {
            if (results.group().isEmpty()) continue;
            this.processURL(message, depth, results.group(), baseURL);
        }
    }

    private boolean parseSource(HttpMessage message, Source source, int depth, String baseURL) {
        this.getLogger().debug("Parsing an HTML message...");
        boolean resourcesfound = false;
        List elements = source.getAllElements("a");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "href");
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "ping");
        }
        elements = source.getAllElements("applet");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "archive");
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "codebase");
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "src");
        }
        elements = source.getAllElements("area");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "href");
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "ping");
        }
        elements = source.getAllElements("audio");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "src");
        }
        elements = source.getAllElements("embed");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "src");
        }
        elements = source.getAllElements("frame");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "src");
        }
        elements = source.getAllElements("iframe");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "src");
        }
        elements = source.getAllElements("input");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "src");
        }
        elements = source.getAllElements("isindex");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "action");
        }
        elements = source.getAllElements("link");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "href");
        }
        elements = source.getAllElements("object");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "data");
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "codebase");
        }
        elements = source.getAllElements("script");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "src");
        }
        elements = source.getAllElements("table");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "background");
        }
        elements = source.getAllElements("td");
        for (Element src : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, src, "background");
        }
        elements = source.getAllElements("video");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "src");
            List videoSourceElements = el.getAllElements("source");
            for (Element sourceElement : videoSourceElements) {
                resourcesfound |= this.processAttributeElement(message, depth, baseURL, sourceElement, "src");
            }
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "poster");
        }
        elements = source.getAllElements("img");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "src");
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "longdesc");
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "lowsrc");
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "dynsrc");
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "srcset", this::srcSetProcessor);
        }
        elements = source.getAllElements(IMPORT_TAG);
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "implementation");
        }
        Object baseUrlForText = baseURL;
        for (String tag : elementsWithText) {
            elements = source.getAllElements(tag);
            for (Element el : elements) {
                Matcher matcher = INLINE_CONTENT_URL_PATTERN.matcher(el.getContent().getRenderer().setMaxLineLength(0).toString());
                while (matcher.find()) {
                    String foundMatch = matcher.group().trim();
                    if (this.baseTagSet) {
                        if (!((String)baseUrlForText).endsWith("/")) {
                            baseUrlForText = (String)baseUrlForText + "/";
                        }
                        if (foundMatch.charAt(0) == '/' && foundMatch.indexOf("//") != 0) {
                            foundMatch = foundMatch.substring(1);
                        }
                    }
                    this.processURL(message, depth, foundMatch, (String)baseUrlForText);
                    resourcesfound = true;
                }
            }
        }
        elements = source.getAllElements("meta");
        for (Element el : elements) {
            String equiv = el.getAttributeValue("http-equiv");
            String name = el.getAttributeValue("name");
            String content = el.getAttributeValue("content");
            if (equiv != null && content != null) {
                Matcher matcher;
                if (!equiv.equalsIgnoreCase("refresh") && !equiv.equalsIgnoreCase("location") && !equiv.equalsIgnoreCase("content-security-policy") || !(matcher = URL_PATTERN.matcher(content)).find()) continue;
                String url = matcher.group(1);
                this.processURL(message, depth, url, baseURL);
                resourcesfound = true;
                continue;
            }
            if (!"msapplication-config".equalsIgnoreCase(name) || content == null || content.equals("") || content.equalsIgnoreCase("none")) continue;
            this.processURL(message, depth, content, baseURL);
            resourcesfound = true;
        }
        elements = source.getAllElements("html");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "manifest");
        }
        elements = source.getAllElements("body");
        for (Element el : elements) {
            resourcesfound |= this.processAttributeElement(message, depth, baseURL, el, "background");
        }
        return resourcesfound;
    }

    private boolean processAttributeElement(HttpMessage message, int depth, String baseURL, Element element, String attributeName) {
        return this.processAttributeElement(message, depth, baseURL, element, attributeName, null);
    }

    private boolean processAttributeElement(HttpMessage message, int depth, String baseURL, Element element, String attributeName, CustomUrlProcessor customUrlProcessor) {
        String localURL = element.getAttributeValue(attributeName);
        if (localURL == null) {
            return false;
        }
        if (customUrlProcessor != null) {
            customUrlProcessor.process(message, depth, localURL, baseURL);
        } else if (!attributeName.equalsIgnoreCase("ping")) {
            this.processURL(message, depth, localURL, baseURL);
        } else {
            for (String pingURL : localURL.split("\\s")) {
                if (pingURL.isEmpty()) continue;
                this.processURL(message, depth, pingURL, baseURL);
            }
        }
        return true;
    }

    @Override
    public boolean canParseResource(HttpMessage message, String path, boolean wasAlreadyConsumed) {
        return !wasAlreadyConsumed && message.getResponseHeader().isHtml();
    }

    @FunctionalInterface
    private static interface CustomUrlProcessor {
        public void process(HttpMessage var1, int var2, String var3, String var4);
    }
}

