"""
A tool to print metadata of a SAS SAS7BDAT file
"""
# Copyright (c) 2022. Harvard University
#
# Developed by Research Software Engineering,
# Faculty of Arts and Sciences, Research Computing (FAS RC)
# Author: Michael A Bouzinier
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import sys
from nsaph.data_model.utils import DataReader
from sas7bdat import SAS7BDAT
from nsaph_utils.utils.io_utils import sizeof_fmt
[docs]def info(file_path: str):
reader = SAS7BDAT(file_path, skip_header=True)
columns = [
column.name if isinstance(column.name, str)
else column.name.decode("utf-8")
for column in reader.columns
]
for i, column in enumerate(columns):
print (column, reader.columns[i].type)
header = reader.header.properties
print(header)
print("Row count: {:,}".format(header.row_count))
s = header.page_count * header.page_length
print("Size: {}".format(sizeof_fmt(s)))
simulate(file_path)
count = 0
for row in reader:
count += 1
if (count % 100000) == 0:
print(count)
if count > header.row_count:
print(row)
if count > header.row_count + 20:
break
print(count)
[docs]def simulate(file_path: str, page = 100):
count = 0
with DataReader(file_path) as reader:
while True:
c = 0
for row in reader.rows():
count += 1
c += 1
if c >= page:
if (count % 1000000) == 0:
print(row)
break
if c < 1:
break
return
if __name__ == '__main__':
info(sys.argv[1])