001package com.astrolabsoftware.FinkBrowser.Parquet;
002
003import org.apache.hadoop.conf.Configuration;
004import org.apache.hadoop.fs.Path;
005import org.apache.parquet.hadoop.ParquetFileReader;
006import org.apache.parquet.hadoop.metadata.ParquetMetadata;
007import org.apache.parquet.format.converter.ParquetMetadataConverter;
008import org.apache.parquet.schema.MessageType;
009import org.apache.parquet.column.page.PageReadStore;
010import org.apache.parquet.io.MessageColumnIO;
011import org.apache.parquet.io.ColumnIOFactory;
012import org.apache.parquet.io.RecordReader;
013import org.apache.parquet.example.data.simple.convert.GroupRecordConverter;
014import org.apache.parquet.example.data.Group;
015import org.apache.parquet.example.data.simple.SimpleGroup;
016import org.apache.parquet.schema.GroupType;
017
018public class Test {
019
020  public static void main(String[] args) {
021    readParquetFile();
022    }
023  
024  private static void readParquetFile() {
025    Configuration conf = new Configuration();
026    Path path = new Path("/user/hrivnac/2021_05_01_part-00001-25a8dbcc-1a3c-428b-9eeb-087566a78bbd.c000.snappy.parquet");
027    try {
028      ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, path, ParquetMetadataConverter.NO_FILTER);
029      MessageType schema = readFooter.getFileMetaData().getSchema();
030      ParquetFileReader r = new ParquetFileReader(conf, path, readFooter);
031      PageReadStore pages = null;
032      while (null != (pages = r.readNextRowGroup())) {
033        final long rows = pages.getRowCount();
034        System.out.println("Number of rows: " + rows);      
035        final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
036        final RecordReader<Group> recordReader = columnIO.getRecordReader(pages, new GroupRecordConverter(schema));
037        String sTemp = "";
038        Group g;
039        SimpleGroup sg;
040        GroupType type;
041        int n;
042        while ((g = recordReader.read()) != null) {
043          if (g instanceof SimpleGroup) {
044            sg = (SimpleGroup)g;
045            type = sg.getType();
046            n = type.getFieldCount();
047            for (int i = 0; i < n; i++) {
048              System.out.println(type.getFieldName(i) + " " + g.getFieldRepetitionCount(i) + " " + type.getType(i));
049              //System.out.println(g.getString(i, 0));
050              }
051            }
052          else {
053            System.out.println(g.getClass());
054            }
055          }
056        }
057      }
058    catch (Exception e) {
059      e.printStackTrace();
060      }
061    }
062    
063  }