dge-traefik-logs/sqlite_to_parquet.py
Guillaume RYCKELYNCK 9b41e2350f first commit 20240311
2024-03-11 08:36:55 +01:00

42 lines
657 B
Python

import os
import duckdb
duckdb.sql('''
ATTACH './db/dge_traefik_logs.db3' AS logs;
USE logs;
''')
r = duckdb.sql('''
SHOW TABLES;
''')
print(r)
tables = duckdb.sql('''
SHOW TABLES;
''').pl().get_column('name').to_list()
# print(tables[:10])
for table in tables:
print(table)
if not os.path.isfile(table + '.parquet'):
duckdb.sql(
f'''
COPY (SELECT * FROM logs.{table})
TO '{table}.parquet' (FORMAT 'parquet')
'''
)
nb = duckdb.sql(
f'''
FROM '{table}.parquet'
SELECT COUNT(*)
''')
print(nb)