74 lines
3.2 KiB
Python
74 lines
3.2 KiB
Python
import datetime
|
|
|
|
import duckdb
|
|
|
|
TABLES = ['LOGS_2023_07', 'LOGS_2023_08', 'LOGS_2023_09', 'LOGS_2023_10', 'LOGS_2023_11', 'LOGS_2023_12']
|
|
|
|
sql_select_nb_lines = "SELECT COUNT(id) AS nb_lines FROM read_parquet('{table}');"
|
|
sql_select_nb_requests = "SELECT COUNT(id) AS nb_requests FROM read_parquet('{table}');"
|
|
sql_select_nb_requests_by_service = "SELECT ogc_service AS service, COUNT(id) AS requests FROM read_parquet('{table}') WHERE dge_application IN ('geoserver', 'geonetwork') and ogc_service IN ('wms', 'wfs', 'wmts', 'csw') GROUP BY ogc_service ORDER BY ogc_service;"
|
|
sql_select_content_size = "SELECT SUM(CAST(org_content_size as INTEGER)) AS size FROM read_parquet('{table}');"
|
|
sql_select_content_size_by_service = "SELECT ogc_service AS service, SUM(CAST(org_content_size as INTEGER)) AS size FROM read_parquet('{table}') WHERE dge_application IN ('geoserver', 'geonetwork') and ogc_service IN ('wms', 'wfs', 'wmts', 'csw') GROUP BY ogc_service ORDER BY ogc_service;"
|
|
|
|
|
|
def read_tables(tables):
|
|
|
|
result = {}
|
|
|
|
for table in tables:
|
|
|
|
pq_file = './parquet/' + table + '.parquet'
|
|
|
|
result[table] = {}
|
|
|
|
result[table]['nb_lines'] = duckdb.sql(sql_select_nb_lines.format(table=pq_file)).fetchone()[0]
|
|
result[table]['nb_requests_all'] = duckdb.sql(sql_select_nb_requests.format(table=pq_file)).fetchone()[0]
|
|
nb_requests_by_service = duckdb.sql(sql_select_nb_requests_by_service.format(table=pq_file)).fetchall()
|
|
result[table]['nb_requests_other'] = 0
|
|
result[table]['nb_requests_services'] = 0
|
|
for row in nb_requests_by_service:
|
|
result[table]['nb_' + row[0]] = row[1]
|
|
result[table]['nb_requests_services'] += row[1]
|
|
result[table]['size_all'] = duckdb.sql(sql_select_content_size.format(table=pq_file)).fetchone()[0]
|
|
size_by_service = duckdb.sql(sql_select_content_size_by_service.format(table=pq_file)).fetchall()
|
|
result[table]['size_other'] = 0
|
|
result[table]['size_services'] = 0
|
|
for row in size_by_service:
|
|
result[table]['size_' + row[0]] = row[1]
|
|
result[table]['size_services'] += row[1]
|
|
result[table]['nb_requests_other'] = result[table]['nb_requests_all'] - result[table]['nb_requests_services']
|
|
result[table]['size_other'] = result[table]['size_all'] - result[table]['size_services']
|
|
|
|
return result
|
|
|
|
|
|
def print_result(result):
|
|
print('-'*80)
|
|
lines = []
|
|
first = True
|
|
for table in result:
|
|
if first:
|
|
line = ['TABLE']
|
|
line.extend([str(key).upper() for key in result[table]])
|
|
lines.append(','.join(line))
|
|
first = False
|
|
line = [table]
|
|
line.extend([str(result[table][key]) for key in result[table]])
|
|
lines.append(','.join(line))
|
|
print('\n'.join(lines))
|
|
print('-'*80)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print('=' * 80)
|
|
start = datetime.datetime.now()
|
|
print("Start:", start.strftime("%H:%M:%S"))
|
|
|
|
result = read_tables(tables=TABLES)
|
|
print_result(result)
|
|
|
|
end = datetime.datetime.now()
|
|
print("End:", end.strftime("%H:%M:%S"))
|
|
print("Total duration:", str(end - start).split('.')[0])
|
|
print('=' * 80)
|