View Javadoc
1   /*
2    * Copyright 2015-2024 Medical Information Systems Research Group (https://medical.zcu.cz),
3    * Department of Computer Science and Engineering, University of West Bohemia.
4    * Address: Univerzitni 8, 306 14 Plzen, Czech Republic.
5    *
6    * Author Petr Vcelak (vcelak@kiv.zcu.cz).
7    *
8    * This file is part of AnonMed project.
9    *
10   * AnonMed is free software: you can redistribute it and/or modify
11   * it under the terms of the GNU General Public License as published by
12   * the Free Software Foundation, either version 3 of the License.
13   *
14   * AnonMed is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17   * GNU General Public License for more details.
18   *
19   * You should have received a copy of the GNU General Public License
20   * along with AnonMed. If not, see <http://www.gnu.org/licenses/>.
21   */
22  package cz.zcu.mre.anonmed.anonymizer;
23  
24  import cz.zcu.mre.anonmed.AnonMedConfiguration;
25  import cz.zcu.mre.anonmed.AnonMedException;
26  import cz.zcu.mre.anonmed.AnonymousIdentification;
27  import cz.zcu.mre.anonmed.rule.Rule;
28  import cz.zcu.mre.mrelib.Const;
29  import cz.zcu.mre.mrelib.data.FileType;
30  import java.io.File;
31  import java.io.FileInputStream;
32  import java.io.FileNotFoundException;
33  import java.io.FileOutputStream;
34  import java.io.IOException;
35  import org.apache.poi.hssf.usermodel.HSSFWorkbook;
36  import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
37  import org.apache.poi.ss.usermodel.Cell;
38  import org.apache.poi.ss.usermodel.CellType;
39  import org.apache.poi.ss.usermodel.Sheet;
40  import org.apache.poi.ss.usermodel.Workbook;
41  import org.apache.poi.xssf.usermodel.XSSFWorkbook;
42  import org.slf4j.Logger;
43  import org.slf4j.LoggerFactory;
44  
45  /**
46   * The XML file de-identification class.
47   *
48   * @author Petr Vcelak (vcelak@kiv.zcu.cz)
49   * @since 2015-10-29
50   */
51  public class XLSAnonymizer
52          extends AbstractAnonymizer {
53  
54      /**
55       * Logger.
56       */
57      private static final Logger LOG = LoggerFactory.getLogger(XLSAnonymizer.class);
58  
59      /**
60       * Coordinate address size is 3 (sheet name, column and row).
61       */
62      public static final int COORDINATES_SIZE = 3;
63      /**
64       * Sheet is the first part of address.
65       */
66      public static final int SHEET = 0;
67  
68      /**
69       * Column is the second part of address.
70       */
71      public static final int COLUMN = 1;
72  
73      /**
74       * Row is the third part of address.
75       */
76      public static final int ROW = 2;
77  
78      /**
79       * Workbook instance.
80       */
81      private Workbook workbook;
82  
83      /**
84       * An anonymous identification.
85       */
86      private String anonymousIdentification;
87  
88      /**
89       * XML File de-identification constructor.
90       *
91       * @param config Configuration
92       */
93      public XLSAnonymizer(
94              final AnonMedConfiguration config) {
95  
96          super(config);
97  
98          setFileType(FileType.XLS);
99          setFileExtension(Const.XLS_FILE_EXTENSION);
100     }
101 
102     /**
103      * Open XML file for read.
104      *
105      * @param file File to open.
106      */
107     @Override
108     public final void fileOpen(final File file) {
109 
110         anonymousIdentification = null;
111 
112         FileInputStream fis = null;
113         try {
114             fis = new FileInputStream(file);
115 
116             // Using XSSF for xlsx format, for xls use HSSF
117             try {
118                 workbook = new HSSFWorkbook(fis);
119             } catch (OfficeXmlFileException office) {
120                 fis = new FileInputStream(file);
121                 workbook = new XSSFWorkbook(fis);
122             }
123 
124             // set active file when read was success
125             setActiveFile(file);
126 
127         } catch (FileNotFoundException ex) {
128             LOG.error("File {} not found. {}", file.getAbsolutePath(), ex.getMessage());
129         } catch (IOException ex) {
130             LOG.error("IOException {} {}", file.getAbsolutePath(), ex.getMessage());
131         } finally {
132             if (fis != null) {
133                 try {
134                     fis.close();
135                 } catch (IOException ex) {
136                     LOG.error(null, ex);
137                 }
138             }
139         }
140     }
141 
142     /**
143      * Save the XML file.
144      *
145      * @param filename file name
146      * @throws cz.zcu.mre.anonmed.AnonMedException exception.
147      */
148     @Override
149     public final void fileWrite(
150             final String filename)
151             throws AnonMedException {
152 
153         File file = new File(filename);
154 
155         if (!getConfig().isOverwriteFile() && file.exists()) {
156             throw new AnonMedException("Overwrite not allowed and "
157                     + "the output file '" + file.getAbsolutePath()
158                     + "' exists", -1);
159         }
160 
161         /* Write XLS file. */
162         FileOutputStream fos = null;
163         try {
164             fos = new FileOutputStream(file);
165             workbook.write(fos);
166 
167         } catch (FileNotFoundException ex) {
168             LOG.error("File {} not found. {}", file.getAbsolutePath(), ex.getMessage());
169         } catch (IOException ex) {
170             LOG.error("IOException {} {}", file.getAbsolutePath(), ex.getMessage());
171         } finally {
172             try {
173                 if (fos != null) {
174                     fos.close();
175                 }
176             } catch (IOException ex) {
177                 LOG.error(null, ex);
178             }
179         }
180 
181     }
182 
183     /**
184      * Rule append after the old value.
185      *
186      * @param rule the rule.
187      */
188     @Override
189     public final void ruleAppendAfter(final Rule rule) {
190 
191         Object[] id = rule2Coordinates(rule);
192         if (!canAnonymize(rule, id)) {
193             return;
194         }
195 
196         Sheet sheet = workbook.getSheet((String) id[SHEET]);
197         String oldValue = getCellStringValue(sheet.getRow(
198                 (Integer) id[ROW]).getCell((Integer) id[COLUMN]));
199         String newValue = oldValue + rule.getNewValue();
200 
201         if (rule.getCondition() != null
202                 && rule.getCondition().equals("toString")) {
203             sheet.getRow((Integer) id[ROW]).getCell(
204                     (Integer) id[COLUMN]).setCellType(CellType.STRING);
205         }
206 
207         report(rule, oldValue, newValue);
208         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
209                 (Integer) id[COLUMN]), newValue);
210     }
211 
212     /**
213      * Rule append before the old value.
214      *
215      * @param rule the rule.
216      */
217     @Override
218     public final void ruleAppendBefore(final Rule rule) {
219 
220         Object[] id = rule2Coordinates(rule);
221         if (!canAnonymize(rule, id)) {
222             return;
223         }
224 
225         Sheet sheet = workbook.getSheet((String) id[SHEET]);
226         String oldValue = getCellStringValue(sheet.getRow(
227                 (Integer) id[ROW]).getCell((Integer) id[COLUMN]));
228         String newValue = rule.getNewValue() + oldValue;
229 
230         if (rule.getCondition() != null
231                 && rule.getCondition().equals("toString")) {
232             sheet.getRow((Integer) id[ROW]).getCell(
233                     (Integer) id[COLUMN]).setCellType(CellType.STRING);
234         }
235 
236         report(rule, oldValue, newValue);
237         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
238                 (Integer) id[COLUMN]), newValue);
239     }
240 
241     /**
242      * Set object value to <code>null</code>.
243      *
244      * @param rule the rule.
245      */
246     @Override
247     public final void ruleEmpty(final Rule rule) {
248 
249         Object[] id = rule2Coordinates(rule);
250         if (!canAnonymize(rule, id)) {
251             return;
252         }
253 
254         Sheet sheet = workbook.getSheet((String) id[SHEET]);
255         String oldValue = getCellStringValue(sheet.getRow(
256                 (Integer) id[ROW]).getCell((Integer) id[COLUMN]));
257 
258         report(rule, oldValue, "[EMPTY]");
259         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
260                 (Integer) id[COLUMN]), "");
261     }
262 
263     /**
264      * Rule change value.
265      *
266      * @param rule the rule.
267      */
268     @Override
269     public final void ruleChange(final Rule rule) {
270 
271         Object[] id = rule2Coordinates(rule);
272         if (!canAnonymize(rule, id)) {
273             return;
274         }
275 
276         Sheet sheet = workbook.getSheet((String) id[SHEET]);
277         String oldValue = getCellStringValue(sheet.getRow(
278                 (Integer) id[ROW]).getCell((Integer) id[COLUMN]));
279 
280         report(rule, oldValue, rule.getNewValue());
281         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
282                 (Integer) id[COLUMN]), rule.getNewValue());
283     }
284 
285     /**
286      * Modify object old value with new one based on identification's
287      * substitution database.
288      *
289      * @param rule the rule.
290      */
291     @Override
292     public final void ruleIdentification(final Rule rule) {
293 
294         Object[] id = rule2Coordinates(rule);
295         if (!canAnonymize(rule, id)) {
296             return;
297         }
298 
299         Sheet sheet = workbook.getSheet((String) id[SHEET]);
300         String oldValue = getCellStringValue(sheet.getRow(
301                 (Integer) id[ROW]).getCell((Integer) id[COLUMN]));
302 
303         if (anonymousIdentification == null) {
304             anonymousIdentification
305                     = AnonymousIdentification.anonymize(oldValue);
306         }
307 
308         report(rule, oldValue, anonymousIdentification);
309         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
310                 (Integer) id[COLUMN]), anonymousIdentification);
311     }
312 
313     /**
314      * Rule keep.
315      *
316      * @param rule the rule.
317      */
318     @Override
319     public final void ruleKeep(final Rule rule) {
320         // TODO only for strict mode
321     }
322 
323     /**
324      * Remove object.
325      *
326      * @param rule the rule
327      */
328     @Override
329     public final void ruleRemove(final Rule rule) {
330 
331         Object[] id = rule2Coordinates(rule);
332         if (!canAnonymize(rule, id)) {
333             return;
334         }
335 
336         Sheet sheet = workbook.getSheet((String) id[SHEET]);
337         String oldValue = getCellStringValue(sheet.getRow(
338                 (Integer) id[ROW]).getCell((Integer) id[COLUMN]));
339 
340         report(rule, oldValue, "[REMOVED]");
341         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
342                 (Integer) id[COLUMN]), "");
343     }
344 
345     /**
346      * Rule specific is not supported -- throws UnsuportedOperationException.
347      *
348      * @param rule the rule.
349      */
350     @Override
351     public final void ruleSpecific(final Rule rule) {
352 
353         throw new UnsupportedOperationException("Not supported.");
354     }
355 
356     /**
357      * The Strict mode is not supported in the XML format.
358      */
359     @Override
360     public void strictMode() {
361         /* Strict mode is not supported in the XLSX format. */
362     }
363 
364     @Override
365     public void ruleDecrypt(Rule rule) {
366 
367         Object[] id = rule2Coordinates(rule);
368         if (!canAnonymize(rule, id)) {
369             return;
370         }
371 
372         Sheet sheet = workbook.getSheet((String) id[SHEET]);
373         String oldValue = getCellStringValue(sheet.getRow((Integer) id[ROW]).getCell((Integer) id[COLUMN]));
374         String newValue = makeDecrypt(rule, oldValue);
375 
376         if (rule.getCondition() != null
377                 && rule.getCondition().equals("toString")) {
378             sheet.getRow((Integer) id[ROW]).getCell(
379                     (Integer) id[COLUMN]).setCellType(CellType.STRING);
380         }
381 
382         report(rule, oldValue, newValue);
383         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
384                 (Integer) id[COLUMN]), newValue);
385     }
386 
387     @Override
388     public void ruleEncrypt(Rule rule) {
389 
390         Object[] id = rule2Coordinates(rule);
391         if (!canAnonymize(rule, id)) {
392             return;
393         }
394 
395         Sheet sheet = workbook.getSheet((String) id[SHEET]);
396         String oldValue = getCellStringValue(sheet.getRow((Integer) id[ROW]).getCell((Integer) id[COLUMN]));
397         String newValue = makeEncrypt(rule, oldValue);
398 
399         if (rule.getCondition() != null
400                 && rule.getCondition().equals("toString")) {
401             sheet.getRow((Integer) id[ROW]).getCell(
402                     (Integer) id[COLUMN]).setCellType(CellType.STRING);
403         }
404 
405         report(rule, oldValue, newValue);
406         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
407                 (Integer) id[COLUMN]), newValue);
408     }
409 
410     /**
411      * Convert the rule to coordinates.
412      *
413      * @param rule the rule
414      * @return coordinates as an Object[3] array with {sheet name, column id,
415      * row id}.
416      */
417     protected final Object[] rule2Coordinates(
418             final Rule rule) {
419 
420         Object[] id = new Object[COORDINATES_SIZE];
421         String[] s = rule.getRule().split(",");
422 
423         id[SHEET] = s[SHEET];
424         id[ROW] = Integer.parseInt(s[ROW]) - 1;
425         id[COLUMN] = Integer.parseInt(s[COLUMN]) - 1;
426 
427         return id;
428     }
429 
430     /**
431      * Check if can anonymize at coordiantes.
432      *
433      * @param rule the rule.
434      * @param id coordinates.
435      * @return true if it is possible to anonymize at coordinates.
436      */
437     protected final boolean canAnonymize(
438             final Rule rule,
439             final Object[] id) {
440 
441         return workbook.getSheet((String) id[SHEET]) != null;
442     }
443 
444     /**
445      * Get cell value as a String.
446      *
447      * @param cell Cell.
448      * @return String value.
449      */
450     protected final String getCellStringValue(
451             final Cell cell) {
452 
453         String value = null;
454 
455         switch (cell.getCellType()) {
456             case BLANK:
457                 break;
458 
459             case BOOLEAN:
460                 value = String.valueOf(cell.getBooleanCellValue());
461                 break;
462 
463             case ERROR:
464                 break;
465 
466             case FORMULA:
467                 break;
468 
469             case NUMERIC:
470                 value = String.valueOf(cell.getNumericCellValue());
471                 break;
472 
473             case STRING:
474                 value = String.valueOf(cell.getStringCellValue());
475                 break;
476 
477             default:
478                 break;
479 
480         }
481 
482         return value;
483     }
484 
485     /**
486      * Set string value in the cell with cell datatype detection and conversion.
487      *
488      * @param cell Cell.
489      * @param value Set the value.
490      */
491     protected final void setCellStringValue(
492             final Cell cell,
493             final Object value) {
494 
495         if (value instanceof String) {
496             cell.setCellType(CellType.STRING);
497             cell.setCellValue((String) value);
498 
499         } else if (value instanceof Integer) {
500             cell.setCellType(CellType.NUMERIC);
501             cell.setCellValue((Integer) value);
502 
503         } else if (value instanceof Float
504                 || value instanceof Double) {
505             cell.setCellType(CellType.NUMERIC);
506             cell.setCellValue((Double) value);
507 
508         } else if (value instanceof Boolean) {
509             cell.setCellType(CellType.BOOLEAN);
510             cell.setCellValue((Boolean) value);
511 
512         } else {
513             cell.setCellType(CellType.STRING);
514             cell.setCellValue(value.toString());
515 
516         }
517     }
518 
519     /**
520      * Replace value in a sed-like manner: 's#REGEX#REPLACEMENT#FLAGS'.
521      *
522      * @param rule the rule.
523      */
524     @Override
525     public void ruleSubstitute(Rule rule) {
526 
527         Object[] id = rule2Coordinates(rule);
528         if (!canAnonymize(rule, id)) {
529             return;
530         }
531 
532         Sheet sheet = workbook.getSheet((String) id[SHEET]);
533         String oldValue = getCellStringValue(sheet.getRow((Integer) id[ROW]).getCell((Integer) id[COLUMN]));
534         String newValue = makeSubstitute(rule, oldValue);
535 
536         if (rule.getCondition() != null
537                 && rule.getCondition().equals("toString")) {
538             sheet.getRow((Integer) id[ROW]).getCell(
539                     (Integer) id[COLUMN]).setCellType(CellType.STRING);
540         }
541 
542         report(rule, oldValue, newValue);
543         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
544                 (Integer) id[COLUMN]), newValue);
545     }
546 
547     /**
548      * Get and use makeSubstring with start and end positions in the value.
549      *
550      * @param rule the rule.
551      */
552     @Override
553     public void ruleSubstring(Rule rule) {
554 
555         Object[] id = rule2Coordinates(rule);
556         if (!canAnonymize(rule, id)) {
557             return;
558         }
559 
560         Sheet sheet = workbook.getSheet((String) id[SHEET]);
561         String oldValue = getCellStringValue(sheet.getRow((Integer) id[ROW]).getCell((Integer) id[COLUMN]));
562         String newValue = makeSubstring(rule, oldValue);
563 
564         if (rule.getCondition() != null
565                 && rule.getCondition().equals("toString")) {
566             sheet.getRow((Integer) id[ROW]).getCell(
567                     (Integer) id[COLUMN]).setCellType(CellType.STRING);
568         }
569 
570         report(rule, oldValue, newValue);
571         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
572                 (Integer) id[COLUMN]), newValue);
573     }
574 
575     /**
576      * Text to lowercase letters.
577      *
578      * @param rule THe rule.
579      */
580     @Override
581     public void ruleLowercase(Rule rule) {
582 
583         Object[] id = rule2Coordinates(rule);
584         if (!canAnonymize(rule, id)) {
585             return;
586         }
587 
588         Sheet sheet = workbook.getSheet((String) id[SHEET]);
589         String oldValue = getCellStringValue(sheet.getRow((Integer) id[ROW]).getCell((Integer) id[COLUMN]));
590         String newValue = makeLowercase(rule, oldValue);
591 
592         if (rule.getCondition() != null
593                 && rule.getCondition().equals("toString")) {
594             sheet.getRow((Integer) id[ROW]).getCell(
595                     (Integer) id[COLUMN]).setCellType(CellType.STRING);
596         }
597 
598         report(rule, oldValue, newValue);
599         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
600                 (Integer) id[COLUMN]), newValue);
601     }
602 
603     /**
604      * Text to uppercase letters.
605      *
606      * @param rule THe rule.
607      */
608     @Override
609     public void ruleUppercase(Rule rule) {
610 
611         Object[] id = rule2Coordinates(rule);
612         if (!canAnonymize(rule, id)) {
613             return;
614         }
615 
616         Sheet sheet = workbook.getSheet((String) id[SHEET]);
617         String oldValue = getCellStringValue(sheet.getRow((Integer) id[ROW]).getCell((Integer) id[COLUMN]));
618         String newValue = makeUppercase(rule, oldValue);
619 
620         if (rule.getCondition() != null
621                 && rule.getCondition().equals("toString")) {
622             sheet.getRow((Integer) id[ROW]).getCell(
623                     (Integer) id[COLUMN]).setCellType(CellType.STRING);
624         }
625 
626         report(rule, oldValue, newValue);
627         setCellStringValue(sheet.getRow((Integer) id[ROW]).getCell(
628                 (Integer) id[COLUMN]), newValue);
629     }
630 }