Search Table Row from a Pdf file and Display Result at Desktop App

We have a pdf file. There is course exam routine displaying in the table. Our task is to search the entire pdf file according to course number and display entire row of the table at desktop application. Pdf file is like bellow:

Solution is here

GetLinesFromPDF.java

[code lang=”java”]
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
*
* @author Mohammad Rajaul Islam
*
*/
public class GetLinesFromPDF extends PDFTextStripper {

static List<String> lines = new ArrayList<String>();

public GetLinesFromPDF() throws IOException {
}

public static Map<String, String> getSearchResult(String searchKey) throws IOException {

PDDocument document = null;
// there is a file test.pdf at the root of the project
String fileName = "test.pdf";
Map<String, String> map = null;
try {
document = PDDocument.load(new File(fileName));
PDFTextStripper stripper = new GetLinesFromPDF();
stripper.setSortByPosition(true);
stripper.setStartPage(0);
stripper.setEndPage(document.getNumberOfPages());
Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
stripper.writeText(document, dummy);

// print lines
int count = 0;
for (String line : lines) {
// match our search key, just used linear search for test purpose, we can improve this code at different ways
if (line.contains(searchKey)) {
map = new HashMap<>();
map.put("courseNumber", line);
count++;
continue;
}

if (count > 0 && count < 6) {
try {
if (count == 1) {
map.put("section", line);
count++;
continue;
} else if (count == 2) {
map.put("title", line);
count++;
continue;
} else if (count == 3) {
map.put("examDate", line);
count++;
continue;
} else if (count == 4) {
map.put("time", line);
count++;
continue;
} else if (count == 5 && Character.isDigit(line.charAt(5))) {// when exception will occur, we shall think, note is not empty
break;
} else if (count == 5) {
map.put("note", line);
break;
}
} catch (StringIndexOutOfBoundsException e) {
map.put("note", line);
break;
}

}

}

} finally {
if (document != null) {
document.close();
}
}

return map;
}

/**
* Override the default functionality of PDFTextStripper.writeString()
*/
@Override
protected void writeString(String str, List<TextPosition> textPositions) throws IOException {
lines.add(str);
// you may process the line here itself, as and when it is obtained
}
}

[/code]

GetLinesFromPDF.java

[code lang=”java”]
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import javax.swing.JOptionPane;

/**
*
* @author Mohammad Rajaul Islam
*/
public class PdfTableSearch extends javax.swing.JFrame {

/**
* Creates new form PdfTableSearch
*/
public PdfTableSearch() {
initComponents();
hide();

}

/**
* This method is called from within the constructor to initialize the form.
* WARNING: Do NOT modify this code. The content of this method is always
* regenerated by the Form Editor.
*/
@SuppressWarnings("unchecked")
// <editor-fold defaultstate="collapsed" desc="Generated Code">
private void initComponents() {

jLabel1 = new javax.swing.JLabel();
txtSearchKey = new javax.swing.JTextField();
btnSearch = new javax.swing.JButton();
jLabel2 = new javax.swing.JLabel();
lblCourseNumber = new javax.swing.JLabel();
lblSection = new javax.swing.JLabel();
lblTitle = new javax.swing.JLabel();
lblExamDate = new javax.swing.JLabel();
lblTime = new javax.swing.JLabel();
displayTime = new javax.swing.JLabel();
displayTitle = new javax.swing.JLabel();
displayExamDate = new javax.swing.JLabel();
displayCourseNumber = new javax.swing.JLabel();
displaySection = new javax.swing.JLabel();
lblNote = new javax.swing.JLabel();
displayNote = new javax.swing.JLabel();
lblExecutionTime = new javax.swing.JLabel();

setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);

jLabel1.setFont(new java.awt.Font("Tahoma", 0, 48)); // NOI18N
jLabel1.setHorizontalAlignment(javax.swing.SwingConstants.CENTER);
jLabel1.setText("Pdf Table Search");

txtSearchKey.setFont(new java.awt.Font("Tahoma", 1, 18)); // NOI18N
txtSearchKey.setText("AEBI 122");

btnSearch.setFont(new java.awt.Font("Tahoma", 0, 18)); // NOI18N
btnSearch.setText("Search Pdf");
btnSearch.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
btnSearchActionPerformed(evt);
}
});

jLabel2.setFont(new java.awt.Font("Tahoma", 0, 24)); // NOI18N
jLabel2.setText("Search Result(Time: ms):");

lblCourseNumber.setFont(new java.awt.Font("Tahoma", 1, 18)); // NOI18N
lblCourseNumber.setText("Course Number");

lblSection.setFont(new java.awt.Font("Tahoma", 1, 18)); // NOI18N
lblSection.setText("Section");

lblTitle.setFont(new java.awt.Font("Tahoma", 1, 18)); // NOI18N
lblTitle.setText("Title");

lblExamDate.setFont(new java.awt.Font("Tahoma", 1, 18)); // NOI18N
lblExamDate.setText("Exam Date");

lblTime.setFont(new java.awt.Font("Tahoma", 1, 18)); // NOI18N
lblTime.setText("Time");

displayTime.setFont(new java.awt.Font("Tahoma", 0, 18)); // NOI18N

displayTitle.setFont(new java.awt.Font("Tahoma", 0, 18)); // NOI18N

displayExamDate.setFont(new java.awt.Font("Tahoma", 0, 18)); // NOI18N

displayCourseNumber.setFont(new java.awt.Font("Tahoma", 0, 18)); // NOI18N

displaySection.setFont(new java.awt.Font("Tahoma", 0, 18)); // NOI18N

lblNote.setFont(new java.awt.Font("Tahoma", 1, 18)); // NOI18N
lblNote.setText("Note");

displayNote.setFont(new java.awt.Font("Tahoma", 0, 18)); // NOI18N

javax.swing.GroupLayout layout = new javax.swing.GroupLayout(getContentPane());
getContentPane().setLayout(layout);
layout.setHorizontalGroup(
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup()
.addContainerGap()
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(jLabel1, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addGroup(layout.createSequentialGroup()
.addComponent(txtSearchKey, javax.swing.GroupLayout.PREFERRED_SIZE, 600, javax.swing.GroupLayout.PREFERRED_SIZE)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
.addComponent(btnSearch, javax.swing.GroupLayout.PREFERRED_SIZE, 145, javax.swing.GroupLayout.PREFERRED_SIZE)
.addGap(0, 43, Short.MAX_VALUE)))
.addContainerGap())
.addGroup(layout.createSequentialGroup()
.addGap(63, 63, 63)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup()
.addComponent(jLabel2)
.addGap(18, 18, 18)
.addComponent(lblExecutionTime, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addContainerGap())
.addGroup(layout.createSequentialGroup()
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.TRAILING, false)
.addComponent(lblTime, javax.swing.GroupLayout.Alignment.LEADING, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addComponent(lblExamDate, javax.swing.GroupLayout.Alignment.LEADING, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addComponent(lblSection, javax.swing.GroupLayout.Alignment.LEADING, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addComponent(lblCourseNumber, javax.swing.GroupLayout.Alignment.LEADING, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addComponent(lblTitle, javax.swing.GroupLayout.Alignment.LEADING, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addComponent(lblNote, javax.swing.GroupLayout.Alignment.LEADING, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE))
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup()
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(displayTitle, javax.swing.GroupLayout.Alignment.TRAILING, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addComponent(displayExamDate, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addComponent(displayCourseNumber, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addComponent(displaySection, javax.swing.GroupLayout.Alignment.TRAILING, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)))
.addGroup(layout.createSequentialGroup()
.addGap(16, 16, 16)
.addComponent(displayTime, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE))
.addGroup(layout.createSequentialGroup()
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(displayNote, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE))))))
);
layout.setVerticalGroup(
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup()
.addGap(20, 20, 20)
.addComponent(jLabel1)
.addGap(29, 29, 29)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING, false)
.addComponent(txtSearchKey)
.addComponent(btnSearch, javax.swing.GroupLayout.DEFAULT_SIZE, 43, Short.MAX_VALUE))
.addGap(18, 18, 18)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(jLabel2)
.addComponent(lblExecutionTime, javax.swing.GroupLayout.PREFERRED_SIZE, 40, javax.swing.GroupLayout.PREFERRED_SIZE))
.addGap(38, 38, 38)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
.addComponent(lblCourseNumber, javax.swing.GroupLayout.PREFERRED_SIZE, 36, javax.swing.GroupLayout.PREFERRED_SIZE)
.addComponent(displayCourseNumber, javax.swing.GroupLayout.PREFERRED_SIZE, 34, javax.swing.GroupLayout.PREFERRED_SIZE))
.addGap(18, 18, 18)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
.addComponent(lblSection, javax.swing.GroupLayout.PREFERRED_SIZE, 32, javax.swing.GroupLayout.PREFERRED_SIZE)
.addComponent(displaySection, javax.swing.GroupLayout.PREFERRED_SIZE, 34, javax.swing.GroupLayout.PREFERRED_SIZE))
.addGap(18, 18, 18)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.BASELINE)
.addComponent(lblTitle, javax.swing.GroupLayout.PREFERRED_SIZE, 43, javax.swing.GroupLayout.PREFERRED_SIZE)
.addComponent(displayTitle, javax.swing.GroupLayout.PREFERRED_SIZE, 34, javax.swing.GroupLayout.PREFERRED_SIZE))
.addGap(18, 18, 18)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(lblExamDate, javax.swing.GroupLayout.PREFERRED_SIZE, 40, javax.swing.GroupLayout.PREFERRED_SIZE)
.addComponent(displayExamDate, javax.swing.GroupLayout.PREFERRED_SIZE, 34, javax.swing.GroupLayout.PREFERRED_SIZE))
.addGap(18, 18, 18)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(lblTime, javax.swing.GroupLayout.PREFERRED_SIZE, 34, javax.swing.GroupLayout.PREFERRED_SIZE)
.addComponent(displayTime, javax.swing.GroupLayout.PREFERRED_SIZE, 34, javax.swing.GroupLayout.PREFERRED_SIZE))
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(lblNote, javax.swing.GroupLayout.PREFERRED_SIZE, 34, javax.swing.GroupLayout.PREFERRED_SIZE)
.addComponent(displayNote, javax.swing.GroupLayout.PREFERRED_SIZE, 34, javax.swing.GroupLayout.PREFERRED_SIZE))
.addContainerGap(91, Short.MAX_VALUE))
);

pack();
}// </editor-fold>

private void btnSearchActionPerformed(java.awt.event.ActionEvent evt) {
long startTime = System.currentTimeMillis();
String key = txtSearchKey.getText().trim();
if (key == null || key.isBlank() || key.isEmpty()) {
JOptionPane.showMessageDialog(null, "Enter a Course Number");
} else {
Map<String, String> maps = new HashMap();
try {
maps = GetLinesFromPDF.getSearchResult(key);
hide();
for (Map.Entry<String, String> s : maps.entrySet()) {
if (s.getKey().equalsIgnoreCase("courseNumber")) {
displayCourseNumber.setText(String.valueOf(s.getValue()));
}
if (s.getKey().equalsIgnoreCase("section")) {
displaySection.setText(String.valueOf(s.getValue()));
}
if (s.getKey().equalsIgnoreCase("title")) {
displayTitle.setText(String.valueOf(s.getValue()));
}
if (s.getKey().equalsIgnoreCase("examDate")) {
displayExamDate.setText(String.valueOf(s.getValue()));
}
if (s.getKey().equalsIgnoreCase("time")) {
displayTime.setText(String.valueOf(s.getValue()));
}

if (!s.getKey().isBlank() && s.getKey().equalsIgnoreCase("note")) {

displayNote.setText(String.valueOf(s.getValue()));
lblNote.setVisible(true);
displayNote.setVisible(true);
}
if (s.getKey().isBlank() && s.getKey().equalsIgnoreCase("note")) {
displayNote.setText(null);
lblNote.setVisible(false);
displayNote.setVisible(false);
}

}
display();
} catch (IOException ex) {
JOptionPane.showMessageDialog(null, "Something Wrong!!!");
} finally {
maps = new HashMap<>();
}
long endTime = System.currentTimeMillis();
lblExecutionTime.setText(String.valueOf(endTime – startTime));
}

}

public void hide() {
lblCourseNumber.setVisible(false);

lblSection.setVisible(false);
lblTitle.setVisible(false);
lblExamDate.setVisible(false);
lblTime.setVisible(false);
lblNote.setVisible(false);
displayNote.setText(null);
}

public void display() {
lblCourseNumber.setVisible(true);
lblSection.setVisible(true);
lblTitle.setVisible(true);
lblExamDate.setVisible(true);
lblTime.setVisible(true);

}

/**
* @param args the command line arguments
*/
public static void main(String args[]) {
/* Set the Nimbus look and feel */
//<editor-fold defaultstate="collapsed" desc=" Look and feel setting code (optional) ">
/* If Nimbus (introduced in Java SE 6) is not available, stay with the default look and feel.
* For details see http://download.oracle.com/javase/tutorial/uiswing/lookandfeel/plaf.html
*/
try {
for (javax.swing.UIManager.LookAndFeelInfo info : javax.swing.UIManager.getInstalledLookAndFeels()) {
if ("Nimbus".equals(info.getName())) {
javax.swing.UIManager.setLookAndFeel(info.getClassName());
break;
}
}
} catch (ClassNotFoundException ex) {
java.util.logging.Logger.getLogger(PdfTableSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
} catch (InstantiationException ex) {
java.util.logging.Logger.getLogger(PdfTableSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
} catch (IllegalAccessException ex) {
java.util.logging.Logger.getLogger(PdfTableSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
} catch (javax.swing.UnsupportedLookAndFeelException ex) {
java.util.logging.Logger.getLogger(PdfTableSearch.class.getName()).log(java.util.logging.Level.SEVERE, null, ex);
}
//</editor-fold>
//</editor-fold>

/* Create and display the form */
java.awt.EventQueue.invokeLater(new Runnable() {
public void run() {
new PdfTableSearch().setVisible(true);
}
});
}

// Variables declaration – do not modify
private javax.swing.JButton btnSearch;
private javax.swing.JLabel displayCourseNumber;
private javax.swing.JLabel displayExamDate;
private javax.swing.JLabel displayNote;
private javax.swing.JLabel displaySection;
private javax.swing.JLabel displayTime;
private javax.swing.JLabel displayTitle;
private javax.swing.JLabel jLabel1;
private javax.swing.JLabel jLabel2;
private javax.swing.JLabel lblCourseNumber;
private javax.swing.JLabel lblExamDate;
private javax.swing.JLabel lblExecutionTime;
private javax.swing.JLabel lblNote;
private javax.swing.JLabel lblSection;
private javax.swing.JLabel lblTime;
private javax.swing.JLabel lblTitle;
private javax.swing.JTextField txtSearchKey;
// End of variables declaration
}

[/code]

Best Of Luck!!!

One thought on “Search Table Row from a Pdf file and Display Result at Desktop App

Leave a Reply