package pdf2xml;

import java.awt.Button;
import java.awt.Component;
import java.awt.Dialog;
import java.awt.Frame;
import java.awt.GraphicsEnvironment;
import java.awt.Label;
import java.awt.LayoutManager;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.List;
import java.util.ListIterator;
import java.util.Vector;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;

/* loaded from: input_file:pdf2xml/first_classification.class */
public class first_classification {
    boolean interactive_extraction;
    String path;
    PrintStream dos;
    List current_text_elements;
    String pdftohtml_file_name = "";
    int distance_sum = 0;
    Vector fonts = new Vector();
    Vector lines = new Vector();
    Vector font_counter = new Vector();
    Vector mlbs = new Vector();
    boolean modus = false;
    int page_text_columns_count = 1;
    Vector text_columns = new Vector();
    int removed_elements_before = 0;
    int removed_elements_after = 0;

    public first_classification(boolean z, String str) {
        this.interactive_extraction = z;
        this.path = str;
    }

    public void run(String str) {
        this.pdftohtml_file_name = str;
        try {
            ListIterator listIterator = new SAXBuilder().build(str).getRootElement().getChildren("page").listIterator();
            int i = 0;
            while (listIterator.hasNext()) {
                this.text_columns.clear();
                Element element = (Element) listIterator.next();
                int parseInt = Integer.parseInt(element.getAttribute("number").getValue());
                Integer.parseInt(element.getAttribute("height").getValue());
                int parseInt2 = Integer.parseInt(element.getAttribute("width").getValue()) / this.page_text_columns_count;
                for (int i2 = 0; i2 < this.page_text_columns_count; i2++) {
                    this.text_columns.add(new Text_Column(parseInt2));
                }
                ListIterator listIterator2 = element.getChildren("fontspec").listIterator();
                while (listIterator2.hasNext()) {
                    Element element2 = (Element) listIterator2.next();
                    this.fonts.addElement(new Font(parseInt, Integer.parseInt(element2.getAttribute("id").getValue()), Integer.parseInt(element2.getAttribute("size").getValue()), element2.getAttribute("family").getValue(), element2.getAttribute("color").getValue()));
                }
                this.current_text_elements = element.getChildren("text");
                Element[] elementArr = new Element[this.current_text_elements.size()];
                this.current_text_elements.toArray(elementArr);
                Arrays.sort(elementArr, new top_comparator());
                int i3 = 0;
                for (Element element3 : elementArr) {
                    Text_Element textElement = getTextElement(element3);
                    int abs = Math.abs(textElement.left / parseInt2);
                    if (abs < this.text_columns.size()) {
                        Text_Column text_Column = (Text_Column) this.text_columns.elementAt(abs);
                        if (text_Column.lines.size() > 0) {
                            Line line = (Line) text_Column.lines.lastElement();
                            if (in_the_line(textElement, line)) {
                                line.texts.addElement(textElement);
                                actualize_line_values(textElement, line);
                            } else {
                                Line line2 = new Line();
                                line2.texts.addElement(textElement);
                                set_new_line_values(textElement, line2);
                                text_Column.lines.addElement(line2);
                                i3 += line2.first_top - line.last_top;
                            }
                        } else {
                            Line line3 = new Line();
                            line3.texts.addElement(textElement);
                            set_new_line_values(textElement, line3);
                            text_Column.lines.addElement(line3);
                        }
                    }
                }
                for (int i4 = 0; i4 < this.text_columns.size(); i4++) {
                    this.lines.addAll(((Text_Column) this.text_columns.elementAt(i4)).lines);
                }
                boolean z = false;
                int i5 = 0;
                int i6 = i;
                while (i6 < this.lines.size()) {
                    Line line4 = (Line) this.lines.elementAt(i6);
                    Text_Element[] text_ElementArr = new Text_Element[line4.texts.size()];
                    line4.texts.toArray(text_ElementArr);
                    Arrays.sort(text_ElementArr, new left_comparator());
                    line4.texts.clear();
                    line4.texts.addAll(Arrays.asList(text_ElementArr));
                    int i7 = 0;
                    while (i7 < line4.texts.size() - 1) {
                        Text_Element text_Element = (Text_Element) line4.texts.elementAt(i7);
                        Text_Element text_Element2 = (Text_Element) line4.texts.elementAt(i7 + 1);
                        int belonging_together = belonging_together(text_Element, text_Element2);
                        if (belonging_together != -1) {
                            line4.texts.removeElementAt(i7 + 1);
                            if (belonging_together == 1) {
                                if (text_Element.elements.size() == 0) {
                                    text_Element.elements.add(text_Element);
                                    actualize_text_values(text_Element, text_Element2);
                                }
                                if (text_Element2.value.length() > 0) {
                                    text_Element.elements.add(text_Element2);
                                    actualize_text_values(text_Element, text_Element2);
                                }
                            } else if (belonging_together == 0) {
                                text_Element.value = new StringBuffer().append(text_Element.value).append(" ").append(text_Element2.value).toString();
                                actualize_text_values(text_Element, text_Element2);
                            }
                            i7--;
                        }
                        i7++;
                    }
                    for (int i8 = 0; i8 < line4.texts.size(); i8++) {
                        Text_Element text_Element3 = (Text_Element) line4.texts.elementAt(i8);
                        if (text_Element3.elements.size() > 0) {
                            Text_Element[] text_ElementArr2 = new Text_Element[text_Element3.elements.size()];
                            text_Element3.elements.toArray(text_ElementArr2);
                            Arrays.sort(text_ElementArr2, new top_comparator_for_texts());
                            String str2 = "";
                            for (Text_Element text_Element4 : text_ElementArr2) {
                                str2 = new StringBuffer().append(str2).append(text_Element4.value).append(" ").toString();
                            }
                            text_Element3.value = str2;
                            text_Element3.elements.clear();
                        }
                    }
                    if (line4.texts.size() > 1) {
                        if (z) {
                            i5 += 0;
                            actualize_mlb_values((Multiline_Block) this.mlbs.lastElement(), line4);
                        } else {
                            Multiline_Block multiline_Block = new Multiline_Block();
                            i5 = 0;
                            set_mlb_values(multiline_Block, line4, i6, parseInt);
                            this.mlbs.add(multiline_Block);
                            z = true;
                        }
                    } else if (line4.texts.size() == 1 && z) {
                        Line line5 = (Line) this.lines.elementAt(i6 - 1);
                        i5 += 0;
                        Text_Element text_Element5 = (Text_Element) line4.texts.elementAt(0);
                        int i9 = line4.first_top - line5.bottom;
                        boolean z2 = false;
                        int i10 = 0;
                        for (int i11 = 0; i11 < line5.texts.size(); i11++) {
                            Text_Element text_Element6 = (Text_Element) line5.texts.elementAt(i11);
                            int abs2 = Math.abs(text_Element6.left - text_Element5.left);
                            int abs3 = Math.abs((text_Element6.left + text_Element6.width) - (text_Element5.left + text_Element5.width));
                            if (i9 < text_Element5.height / 2 && text_Element6.typ.equals(text_Element5.typ) && text_Element6.typ.equals("text") && (abs2 < 3 || abs3 < 3)) {
                                text_Element6.value = new StringBuffer().append(text_Element6.value).append("\n").append(text_Element5.value).toString();
                                text_Element6.count_lines++;
                                this.lines.removeElementAt(i6);
                                i6--;
                                actualize_text_values(text_Element6, text_Element5);
                                actualize_line_values(text_Element5, line5);
                                z2 = true;
                            }
                            if (in_boundaries(text_Element5, text_Element6) == 1) {
                                i10++;
                            }
                        }
                        if (!z2) {
                            Multiline_Block multiline_Block2 = (Multiline_Block) this.mlbs.lastElement();
                            int i12 = multiline_Block2.end - multiline_Block2.begin;
                            if (i12 > 0) {
                                multiline_Block2.avg_distance = i5 / i12;
                            } else {
                                multiline_Block2.avg_distance = 0;
                            }
                            z = false;
                        }
                    }
                    i6++;
                }
                i = this.lines.size();
            }
            multiline_block_merge();
            second_classification second_classificationVar = new second_classification(this.interactive_extraction, this.path);
            second_classificationVar.fonts = (Vector) this.fonts.clone();
            second_classificationVar.lines = (Vector) this.lines.clone();
            second_classificationVar.font_counter = (Vector) this.font_counter.clone();
            second_classificationVar.multiline_blocks = (Vector) this.mlbs.clone();
            second_classificationVar.run();
        } catch (IOException e) {
            System.out.println(e);
        } catch (Exception e2) {
            System.out.println(new StringBuffer().append("Exception in class: first_classification. ").append(e2).toString());
        } catch (JDOMException e3) {
            System.out.println(e3.getMessage());
            Dialog dialog = new Dialog(new Frame(GraphicsEnvironment.getLocalGraphicsEnvironment().getDefaultScreenDevice().getDefaultConfiguration()), "Failure", true);
            Label label = new Label("pdftohtml was unable to return right data.");
            Label label2 = new Label("Would you like to restart with pre-debugging?");
            dialog.setLayout((LayoutManager) null);
            label.setBounds(60, 50, 300, 20);
            label2.setBounds(60, 70, 300, 20);
            dialog.add(label);
            dialog.add(label2);
            dialog.setSize(420, 150);
            Button button = new Button("Yes");
            button.addActionListener(new ActionListener(this) { // from class: pdf2xml.first_classification.1
                private final first_classification this$0;

                {
                    this.this$0 = this;
                }

                public void actionPerformed(ActionEvent actionEvent) {
                    Button button2 = (Button) actionEvent.getSource();
                    this.this$0.debug_pdftohtml_output();
                    button2.getParent().dispose();
                }
            });
            button.setBounds(180, 100, 60, 20);
            Button button2 = new Button("No");
            button2.addActionListener(new ActionListener(this) { // from class: pdf2xml.first_classification.2
                private final first_classification this$0;

                {
                    this.this$0 = this;
                }

                public void actionPerformed(ActionEvent actionEvent) {
                    ((Button) actionEvent.getSource()).getParent().dispose();
                }
            });
            button.setBounds(180, 100, 60, 20);
            button2.setBounds(250, 100, 60, 20);
            dialog.add(button);
            dialog.add(button2);
            dialog.setLocationRelativeTo((Component) null);
            dialog.show();
        }
    }

    public int in_boundaries(Text_Element text_Element, Text_Element text_Element2) {
        int i = text_Element.left;
        int i2 = text_Element.left + text_Element.width;
        int i3 = text_Element2.left;
        int i4 = text_Element2.left + text_Element2.width;
        if (i >= i3 && i2 <= i4) {
            return 1;
        }
        if (i >= i3 && i <= i4 && i2 > i4) {
            return 1;
        }
        if (i >= i3 || i2 < i3 || i2 > i4) {
            return (i3 < i || i4 > i2) ? 0 : 1;
        }
        return 1;
    }

    public boolean in_the_line(Text_Element text_Element, Line line) {
        int i = text_Element.top + ((Font) this.fonts.elementAt(text_Element.font)).size;
        if (text_Element.top >= line.first_top && text_Element.top <= line.bottom) {
            return true;
        }
        if (i < line.first_top || i > line.bottom) {
            return text_Element.top <= line.first_top && i >= line.bottom;
        }
        return true;
    }

    public int belonging_together(Text_Element text_Element, Text_Element text_Element2) {
        int i = 0;
        if (text_Element2.value.length() == 0) {
            return text_Element2.value.length() == 0 ? 0 : -1;
        }
        int length = text_Element2.width / text_Element2.value.length();
        if (text_Element.value.length() != 0) {
            i = text_Element.width / text_Element.value.length();
        }
        int i2 = text_Element2.left - (text_Element.left + text_Element.width);
        int i3 = text_Element.left + text_Element.width;
        int i4 = text_Element2.left + text_Element2.width;
        if (text_Element.left > text_Element2.left && i3 < i4) {
            return 1;
        }
        if (text_Element2.left > text_Element.left && i4 < i3) {
            return 1;
        }
        if (i4 > text_Element.left && i4 < i3) {
            return 1;
        }
        if (text_Element2.left <= text_Element.left || text_Element2.left >= i3) {
            return (i2 > length || i2 > i) ? -1 : 0;
        }
        return 1;
    }

    public void actualize_line_values(Text_Element text_Element, Line line) {
        line.top = Math.min(text_Element.top, line.top);
        line.bottom = Math.max(text_Element.top + text_Element.height, line.bottom);
        line.height = line.bottom - line.top;
        line.leftmost = Math.min(text_Element.left, line.leftmost);
        line.rightmost = Math.max(line.rightmost, text_Element.left + text_Element.width);
        line.font = text_Element.font;
        line.last_top = Math.max(text_Element.top, line.last_top);
        line.first_top = Math.min(text_Element.top, line.first_top);
        line.used_space += text_Element.width * text_Element.height;
    }

    public void actualize_text_values(Text_Element text_Element, Text_Element text_Element2) {
        text_Element.last_top = Math.max(text_Element.last_top, text_Element2.last_top);
        text_Element.first_top = Math.min(text_Element.first_top, text_Element2.first_top);
        text_Element.width = (text_Element2.left + text_Element2.width) - text_Element.left;
    }

    public void set_new_line_values(Text_Element text_Element, Line line) {
        line.top = text_Element.top;
        line.bottom = text_Element.top + text_Element.height;
        line.height = line.bottom - line.top;
        line.leftmost = text_Element.left;
        line.rightmost = text_Element.left + text_Element.width;
        line.font = text_Element.font;
        line.last_top = text_Element.top;
        line.first_top = text_Element.top;
        line.used_space = text_Element.width * text_Element.height;
    }

    public void actualize_mlb_values(Multiline_Block multiline_Block, Line line) {
        multiline_Block.end++;
        multiline_Block.leftmost = Math.min(multiline_Block.leftmost, line.leftmost);
        multiline_Block.rightmost = Math.max(multiline_Block.rightmost, line.rightmost);
        multiline_Block.max_elements = Math.max(multiline_Block.max_elements, line.texts.size());
        multiline_Block.used_space += line.used_space;
    }

    public void actualize_mlb_values2(Multiline_Block multiline_Block, Multiline_Block multiline_Block2) {
        multiline_Block.leftmost = Math.min(multiline_Block.leftmost, multiline_Block2.leftmost);
        multiline_Block.rightmost = Math.max(multiline_Block.rightmost, multiline_Block2.rightmost);
        multiline_Block.max_elements = Math.max(multiline_Block.max_elements, multiline_Block2.max_elements);
        multiline_Block.used_space += multiline_Block2.used_space;
        multiline_Block.avg_distance = (multiline_Block.avg_distance + multiline_Block2.avg_distance) / 2;
    }

    public void set_mlb_values(Multiline_Block multiline_Block, Line line, int i, int i2) {
        multiline_Block.begin = i;
        multiline_Block.end = i;
        multiline_Block.leftmost = line.leftmost;
        multiline_Block.rightmost = line.rightmost;
        multiline_Block.max_elements = line.texts.size();
        multiline_Block.avg_distance = 0;
        multiline_Block.page = i2;
        multiline_Block.used_space = line.used_space;
    }

    public Text_Element getTextElement(Element element) {
        String trim = element.getValue().trim();
        int parseInt = Integer.parseInt(element.getAttribute("top").getValue());
        int parseInt2 = Integer.parseInt(element.getAttribute("left").getValue());
        int parseInt3 = Integer.parseInt(element.getAttribute("width").getValue());
        int parseInt4 = Integer.parseInt(element.getAttribute("height").getValue());
        int parseInt5 = Integer.parseInt(element.getAttribute("font").getValue());
        int i = ((Font) this.fonts.elementAt(parseInt5)).size;
        String str = "number";
        try {
            Integer.parseInt(trim);
            Float.parseFloat(trim);
        } catch (NumberFormatException e) {
            str = "text";
        }
        List children = element.getChildren("b");
        List children2 = element.getChildren("i");
        String str2 = "";
        if (children.size() > 0) {
            str2 = "bold";
        } else if (children2.size() > 0) {
            str2 = "italic";
        } else if (children.size() > 0 && children2.size() > 0) {
            str2 = "bolditalic";
        }
        return new Text_Element(trim, parseInt, parseInt2, parseInt3, parseInt4, parseInt5, str2, str);
    }

    public void multiline_block_merge() {
        this.removed_elements_before = 0;
        this.removed_elements_after = 0;
        int i = 0;
        while (i < this.mlbs.size()) {
            Multiline_Block multiline_Block = (Multiline_Block) this.mlbs.elementAt(i);
            multiline_Block.begin = (multiline_Block.begin - this.removed_elements_before) - this.removed_elements_after;
            multiline_Block.end = (multiline_Block.end - this.removed_elements_before) - this.removed_elements_after;
            int i2 = this.removed_elements_before;
            int i3 = this.removed_elements_after;
            if (i == 0) {
                line_merge(i, multiline_Block.begin - 10 > 0 ? 10 : multiline_Block.begin - 1, 0);
                multiline_Block.begin -= this.removed_elements_before - i2;
                multiline_Block.end -= this.removed_elements_before - i2;
            } else if (i == this.mlbs.size() - 1) {
                line_merge(i, 0, multiline_Block.end + 10 < this.lines.size() ? 10 : (this.lines.size() - multiline_Block.end) - 1);
            } else {
                Multiline_Block multiline_Block2 = (Multiline_Block) this.mlbs.elementAt(i - 1);
                Multiline_Block multiline_Block3 = (Multiline_Block) this.mlbs.elementAt(i + 1);
                int i4 = (multiline_Block3.begin - multiline_Block.end) - 1;
                int i5 = (multiline_Block.begin - multiline_Block2.end) - 1;
                if (multiline_Block.page == multiline_Block3.page && multiline_Block.page != multiline_Block2.page) {
                    line_merge(i, 0, i4);
                } else if (multiline_Block.page == multiline_Block2.page && multiline_Block.page != multiline_Block3.page) {
                    line_merge(i, i5, 0);
                } else if (multiline_Block.page == multiline_Block2.page && multiline_Block.page == multiline_Block3.page) {
                    line_merge(i, i5, i4);
                }
                boolean z = false;
                if (multiline_Block.begin - multiline_Block2.end <= 3 && multiline_Block.page == multiline_Block2.page && Math.abs(multiline_Block.max_elements - multiline_Block2.max_elements) <= 1) {
                    multiline_Block2.end = multiline_Block.end - (this.removed_elements_before - i2);
                    this.mlbs.removeElementAt(i);
                    z = true;
                    actualize_mlb_values2(multiline_Block2, multiline_Block);
                    i--;
                }
                if (multiline_Block3.begin - multiline_Block.end <= 3 && multiline_Block3.page == multiline_Block.page && Math.abs(multiline_Block.max_elements - multiline_Block3.max_elements) <= 1) {
                    if (z) {
                        multiline_Block2.end = (multiline_Block3.end - (this.removed_elements_before - i2)) - (this.removed_elements_after - i3);
                        actualize_mlb_values2(multiline_Block2, multiline_Block3);
                        this.mlbs.removeElementAt(i + 1);
                    } else {
                        multiline_Block.begin -= this.removed_elements_before - i2;
                        multiline_Block.end = (multiline_Block3.end - (this.removed_elements_before - i2)) - (this.removed_elements_after - i3);
                        actualize_mlb_values2(multiline_Block, multiline_Block3);
                        this.mlbs.removeElementAt(i + 1);
                    }
                }
            }
            i++;
        }
    }

    public void line_merge(int i, int i2, int i3) {
        Multiline_Block multiline_Block = (Multiline_Block) this.mlbs.elementAt(i);
        Line line = (Line) this.lines.elementAt(multiline_Block.begin);
        Line line2 = (Line) this.lines.elementAt(multiline_Block.end);
        int i4 = 0;
        boolean z = true;
        for (int i5 = 1; i5 <= i2 && z; i5++) {
            Line line3 = (Line) this.lines.elementAt(multiline_Block.begin - i5);
            Vector vector = (Vector) line.texts.clone();
            int i6 = line.first_top - line3.bottom;
            for (int i7 = 0; i7 < line.texts.size(); i7++) {
                Text_Element text_Element = (Text_Element) vector.elementAt(i7);
                for (int i8 = 0; i8 < line3.texts.size(); i8++) {
                    Text_Element text_Element2 = (Text_Element) line3.texts.elementAt(i8);
                    int abs = Math.abs(text_Element2.left - text_Element.left);
                    int abs2 = Math.abs((text_Element2.left + text_Element2.width) - (text_Element.left + text_Element.width));
                    if (i6 < text_Element.height / 2 && text_Element.typ.equals(text_Element2.typ) && text_Element.typ.equals("text") && (abs < 3 || abs2 < 3)) {
                        text_Element.value = new StringBuffer().append(text_Element2.value).append(" ").append(text_Element.value).toString();
                        text_Element.count_lines++;
                        actualize_text_values(text_Element, text_Element2);
                        i4++;
                    }
                }
            }
            if (i4 == line3.texts.size()) {
                line.texts = (Vector) vector.clone();
                for (int i9 = 0; i9 < line.texts.size(); i9++) {
                    actualize_line_values((Text_Element) line.texts.elementAt(i9), line);
                }
                this.lines.removeElementAt(multiline_Block.begin - i5);
                this.removed_elements_before++;
            } else {
                z = false;
            }
            i4 = 0;
        }
        boolean z2 = true;
        for (int i10 = 1; i10 <= i3 && z2; i10++) {
            Line line4 = (Line) this.lines.elementAt(multiline_Block.end + i10);
            Vector vector2 = (Vector) line2.texts.clone();
            int i11 = line4.first_top - line2.bottom;
            for (int i12 = 0; i12 < line2.texts.size(); i12++) {
                Text_Element text_Element3 = (Text_Element) line2.texts.elementAt(i12);
                for (int i13 = 0; i13 < line4.texts.size(); i13++) {
                    Text_Element text_Element4 = (Text_Element) line4.texts.elementAt(i13);
                    int abs3 = Math.abs(text_Element4.left - text_Element3.left);
                    int abs4 = Math.abs((text_Element4.left + text_Element4.width) - (text_Element3.left + text_Element3.width));
                    if (i11 < text_Element3.height / 2 && text_Element3.typ.equals(text_Element4.typ) && text_Element3.typ.equals("text") && (abs3 < 3 || abs4 < 3)) {
                        text_Element3.value = new StringBuffer().append(text_Element3.value).append(" ").append(text_Element4.value).toString();
                        text_Element3.count_lines++;
                        actualize_text_values(text_Element3, text_Element4);
                        i4++;
                    }
                }
            }
            if (i4 == line4.texts.size()) {
                line2.texts = (Vector) vector2.clone();
                for (int i14 = 0; i14 < line2.texts.size(); i14++) {
                    actualize_line_values((Text_Element) line2.texts.elementAt(i14), line2);
                }
                this.lines.removeElementAt(multiline_Block.end + i10);
                this.removed_elements_after++;
            } else {
                z2 = false;
            }
            i4 = 0;
        }
    }

    public void debug_pdftohtml_output() {
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(this.pdftohtml_file_name));
            PrintStream printStream = new PrintStream(new FileOutputStream(new File(new StringBuffer().append(this.path).append("/").append("debugged_output.xml").toString())));
            for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                printStream.println(readLine.replaceAll("A href", "a href").replaceAll("<B>", "<b>").replaceAll("<I>", "<i>").replaceAll("</I>", "</i>").replaceAll("</B>", "</b>"));
            }
            run(new StringBuffer().append(this.path).append("/").append("debugged_output.xml").toString());
        } catch (Exception e) {
            System.out.println(e);
        }
    }
}
