|
|
@@ -0,0 +1,43 @@
|
|
|
+using iText.Kernel.Pdf;
|
|
|
+using iText.Kernel.Pdf.Canvas.Parser;
|
|
|
+using iText.Kernel.Pdf.Canvas.Parser.Listener;
|
|
|
+using System;
|
|
|
+using System.Text;
|
|
|
+using System.Text.RegularExpressions;
|
|
|
+
|
|
|
+namespace GreenTree.Firefly.Importer
|
|
|
+{
|
|
|
+ class Program
|
|
|
+ {
|
|
|
+ static void Main(string[] args)
|
|
|
+ {
|
|
|
+ var file = @"D:\\Downloads\\915733800_2021_Nr.004_Kontoauszug_vom_30.04.2021_20210521024123.pdf";
|
|
|
+ var reader = new PdfReader(file);
|
|
|
+ var pdfDocument = new PdfDocument(reader);
|
|
|
+
|
|
|
+ var pages = pdfDocument.GetNumberOfPages();
|
|
|
+
|
|
|
+ pdfDocument.Close();
|
|
|
+
|
|
|
+ var resultText = new StringBuilder();
|
|
|
+
|
|
|
+ for (int i = 1; i <= pages; i++)
|
|
|
+ {
|
|
|
+ reader = new PdfReader(file);
|
|
|
+ pdfDocument = new PdfDocument(reader);
|
|
|
+
|
|
|
+ var strategy = new LocationTextExtractionStrategy();
|
|
|
+ var page = pdfDocument.GetPage(i);
|
|
|
+
|
|
|
+ var resultLocation = PdfTextExtractor.GetTextFromPage(page, strategy);
|
|
|
+
|
|
|
+ resultText.Append(resultLocation);
|
|
|
+
|
|
|
+ pdfDocument.Close();
|
|
|
+ }
|
|
|
+
|
|
|
+ var allText = resultText.ToString();
|
|
|
+ var regex = Regex.Split(allText, @"\d\d\.\d\d\.\s\d\d\.\d\d\.\s");
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|