Document Processors
Specialized parsing tools for all types of documents.
public interface DocumentProcessor {
public List<Document> processDocument(Path file, String tenant, String userId) throws Exception;
public boolean canProcess(Path file, boolean othersHaveProcessed);
}@Component
@Order(0) // Run first
public class PerformanceReviewProcessor implements DocumentProcessor {
@Override
public List<Document> processDocument(Path file, String tenant, String userId) {
List<Document> documents = new ArrayList<>();
try (BufferedReader reader = Files.newBufferedReader(file)) {
// Load the entire file so we can then work on splitting it
stillLoading = true;
StringBuilder wholeFile = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
wholeFile.append(line).append("\n");
}
String[] pages = wholeFile.toString().split("Page \\d+:\n" +
"===");
int pageNumber = 0;
for (String page : pages) {
Map<String, Object> metadata = new HashMap<>();
metadata.put("tenant", tenant);
metadata.put("user", userId);
metadata.put("absolute_directory_path", file.getParent().toAbsolutePath().toString());
metadata.put("file_name", file.getFileName().toString());
metadata.put("page", ++pageNumber);
documents.add(Document.from(page, Metadata.from(metadata)));
}
} catch (Exception e) {
log.error("Error reading file: " + e.getMessage());
}
return documents;
}
@Override
public boolean canProcess(Path file, boolean othersHaveProcessed) {
// Be sure to convert toString first or it uses Path.endsWith which is weird
return file.toString().contains("-review-export-");
}
}Last updated
Was this helpful?
