Skip to content

Commit 1212adb

Browse files
VED-1072: Ack filenames (part 2) (#1234)
* VED-1072 ack file extension fixes * os.path & update_json_ack_file * pt.2 / file_utils * updated batch file scenario with dat extension * ruff * duplicate function name * subTest --------- Co-authored-by: FimranNHS <fatima.imran3@nhs.net>
1 parent 44994bb commit 1212adb

7 files changed

Lines changed: 352 additions & 289 deletions

File tree

lambdas/ack_backend/src/update_ack_file.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
update_audit_table_item,
1717
)
1818
from common.clients import get_s3_client, logger
19+
from common.file_utils import get_file_key_without_ext
1920
from common.log_decorator import generate_and_send_logs
2021
from common.models.batch_constants import (
2122
ACK_BUCKET_NAME,
@@ -142,7 +143,7 @@ def complete_batch_file_process(
142143
start_time = time.time()
143144

144145
# finish CSV file
145-
file_key_without_ext = os.path.splitext(file_key)[0]
146+
file_key_without_ext = get_file_key_without_ext(file_key)
146147
ack_filename = f"{file_key_without_ext}_BusAck_{created_at_formatted_string}.csv"
147148

148149
move_file(ACK_BUCKET_NAME, f"{TEMP_ACK_DIR}/{ack_filename}", f"{COMPLETED_ACK_DIR}/{ack_filename}")
@@ -247,7 +248,7 @@ def obtain_current_json_ack_content(message_id: str, supplier: str, file_key: st
247248
logger.info("No existing JSON ack file found in S3 - creating new file")
248249

249250
ingestion_start_time = get_ingestion_start_time_by_message_id(message_id)
250-
raw_ack_filename = os.path.splitext(file_key)[0]
251+
raw_ack_filename = get_file_key_without_ext(file_key)
251252

252253
# Generate the initial fields
253254
return _make_ack_data_dict_identifier_information(
@@ -269,7 +270,7 @@ def update_csv_ack_file(
269270
ack_data_rows: list,
270271
) -> None:
271272
"""Updates the ack file with the new data row based on the given arguments"""
272-
file_key_without_ext = os.path.splitext(file_key)[0]
273+
file_key_without_ext = get_file_key_without_ext(file_key)
273274
ack_filename = f"{file_key_without_ext}_BusAck_{created_at_formatted_string}.csv"
274275
temp_ack_file_key = f"{TEMP_ACK_DIR}/{ack_filename}"
275276
accumulated_csv_content = obtain_current_csv_ack_content(temp_ack_file_key)
@@ -293,7 +294,7 @@ def update_json_ack_file(
293294
ack_data_rows: list,
294295
) -> None:
295296
"""Updates the ack file with the new data row based on the given arguments"""
296-
file_key_without_ext = os.path.splitext(file_key)[0]
297+
file_key_without_ext = get_file_key_without_ext(file_key)
297298
ack_filename = f"{file_key_without_ext}_BusAck_{created_at_formatted_string}.json"
298299
temp_ack_file_key = f"{TEMP_ACK_DIR}/{ack_filename}"
299300
ack_data_dict = obtain_current_json_ack_content(message_id, supplier, file_key, temp_ack_file_key)

lambdas/batch_processor_filter/src/batch_file_repository.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
"""Module for the batch file repository"""
22

3-
import os
43
from csv import writer
54
from io import BytesIO, StringIO
65

76
from batch_file_created_event import BatchFileCreatedEvent
87
from common.clients import get_s3_client
8+
from common.file_utils import get_file_key_without_ext
99
from common.models.batch_constants import ACK_BUCKET_NAME, SOURCE_BUCKET_NAME
1010

1111

@@ -49,7 +49,7 @@ def move_source_file_to_archive(self, file_key: str) -> None:
4949
def upload_failure_ack(self, batch_file_created_event: BatchFileCreatedEvent) -> None:
5050
ack_failure_data = self._create_ack_failure_data(batch_file_created_event)
5151

52-
filename_without_ext = os.path.splitext(batch_file_created_event["filename"])[0]
52+
filename_without_ext = get_file_key_without_ext(batch_file_created_event["filename"])
5353
ack_filename = f"ack/{filename_without_ext}_InfAck_{batch_file_created_event['created_at_formatted_string']}.csv"
5454

5555
# Create CSV file with | delimiter, filetype .csv

lambdas/shared/src/common/ack_file_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
"""Create ack file and upload to S3 bucket"""
22

3-
import os
43
from csv import writer
54
from io import BytesIO, StringIO
65

76
from common.clients import get_s3_client
7+
from common.file_utils import get_file_key_without_ext
88
from common.models.batch_constants import ACK_BUCKET_NAME
99

1010

@@ -36,7 +36,7 @@ def make_ack_data(
3636

3737
def upload_ack_file(file_key: str, ack_data: dict, created_at_formatted_string: str) -> None:
3838
"""Formats the ack data into a csv file and uploads it to the ack bucket"""
39-
file_key_without_ext = os.path.splitext(file_key)[0]
39+
file_key_without_ext = get_file_key_without_ext(file_key)
4040
ack_filename = f"ack/{file_key_without_ext}_InfAck_{created_at_formatted_string}.csv"
4141

4242
# Create CSV file with | delimiter, filetype .csv
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
"""General file utils"""
2+
3+
import os
4+
5+
6+
def get_file_key_without_ext(file_key: str) -> str:
7+
return os.path.splitext(file_key)[0]
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import unittest
2+
3+
import common.file_utils
4+
5+
6+
class TestFileUtils(unittest.TestCase):
7+
def test_get_file_key_without_ext(self):
8+
"""Test get_file_key_without_ext returns the expected values"""
9+
10+
test_cases = [
11+
("file_key.csv", "file_key"),
12+
("file_key.dat", "file_key"),
13+
("file_key", "file_key"),
14+
("/mnt/c/dir.a/file_key.csv", "/mnt/c/dir.a/file_key"),
15+
]
16+
17+
for file_key, file_key_without_ext in test_cases:
18+
with self.subTest():
19+
self.assertEqual(common.file_utils.get_file_key_without_ext(file_key), file_key_without_ext)

tests/e2e_automation/features/batchTests/Steps/batch_common_steps.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@
2929
validate_json_bus_ack_file_failure_records,
3030
validate_json_bus_ack_file_structure_and_metadata,
3131
)
32-
from utilities.batch_S3_buckets import upload_file_to_S3, wait_and_read_ack_file, wait_for_file_to_move_archive
32+
from utilities.batch_S3_buckets import (
33+
upload_file_to_S3,
34+
wait_and_read_ack_file,
35+
wait_for_file_to_move_archive,
36+
)
3337
from utilities.enums import ActionFlag, ActionMap, Operation
3438

3539

@@ -86,6 +90,13 @@ def valid_batch_file_is_created_with_details(datatable, context):
8690
create_batch_file(context)
8791

8892

93+
@given("batch file is created for below data as full dataset with file extension dat")
94+
@ignore_if_local_run
95+
def valid_batch_file_is_created_with_details_and_dat_extension(datatable, context):
96+
build_dataFrame_using_datatable(datatable, context)
97+
create_batch_file(context, file_ext="dat")
98+
99+
89100
@when("same batch file is uploaded again in s3 bucket")
90101
@when("batch file is uploaded in s3 bucket")
91102
@ignore_local_run_set_test_data
@@ -140,7 +151,9 @@ def json_bus_ack_will_only_contain_file_metadata_and_no_record_entries(context):
140151

141152

142153
@then("Json bus ack will only contain file metadata and correct failure record entries")
143-
def json_bus_ack_will_only_contain_file_metadata_and_correct_failure_record_entries(context):
154+
def json_bus_ack_will_only_contain_file_metadata_and_correct_failure_record_entries(
155+
context,
156+
):
144157
json_content = context.fileContentJson
145158
assert json_content is not None, "BUS Ack JSON content is None"
146159
validate_json_bus_ack_file_structure_and_metadata(context)
@@ -225,7 +238,11 @@ def validate_imms_event_table_for_all_records_in_batch_file(context, operation:
225238
("Operation", Operation[operation].value, item.get("Operation")),
226239
("SupplierSystem", context.supplier_name, item.get("SupplierSystem")),
227240
("PatientPK", f"Patient#{nhs_number}", item.get("PatientPK")),
228-
("PatientSK", f"{context.vaccine_type.upper()}#{context.ImmsID}", item.get("PatientSK")),
241+
(
242+
"PatientSK",
243+
f"{context.vaccine_type.upper()}#{context.ImmsID}",
244+
item.get("PatientSK"),
245+
),
229246
("Version", int(context.expected_version), int(item.get("Version"))),
230247
]
231248

@@ -269,7 +286,13 @@ def build_dataFrame_using_datatable(datatable, context):
269286
headers = datatable[0]
270287
rows = datatable[1:]
271288

272-
table_list = [(row[headers.index("patient_id")], f"{row[headers.index('unique_id')]}-{timestamp}") for row in rows]
289+
table_list = [
290+
(
291+
row[headers.index("patient_id")],
292+
f"{row[headers.index('unique_id')]}-{timestamp}",
293+
)
294+
for row in rows
295+
]
273296
records = []
274297
for patient_id, unique_id in table_list:
275298
context.patient_id = patient_id
@@ -315,7 +338,8 @@ def validate_imms_delta_table_for_newly_created_records_in_batch_file(context):
315338
create_items = [i for i in delta_items if i.get("Operation") == "CREATE"]
316339

317340
check.is_true(
318-
len(create_items) == 1, f"Expected exactly 1 CREATE record for IMMS_ID {clean_id}, found {len(create_items)}"
341+
len(create_items) == 1,
342+
f"Expected exactly 1 CREATE record for IMMS_ID {clean_id}, found {len(create_items)}",
319343
)
320344

321345
create_item = create_items[0]
@@ -324,7 +348,11 @@ def validate_imms_delta_table_for_newly_created_records_in_batch_file(context):
324348
batch_record = {k: normalize(v) for k, v in row.to_dict().items()}
325349

326350
validate_imms_delta_record_with_batch_record(
327-
context, batch_record, create_item, Operation.created.value, ActionFlag.created.value
351+
context,
352+
batch_record,
353+
create_item,
354+
Operation.created.value,
355+
ActionFlag.created.value,
328356
)
329357

330358

@@ -341,7 +369,11 @@ def validate_imms_delta_table_for_updated_records_in_batch_file(context):
341369
item = update_items.pop(updated_index)
342370

343371
validate_imms_delta_record_with_batch_record(
344-
context, batch_record, item, Operation.updated.value, ActionFlag.updated.value
372+
context,
373+
batch_record,
374+
item,
375+
Operation.updated.value,
376+
ActionFlag.updated.value,
345377
)
346378

347379

@@ -365,5 +397,9 @@ def validate_imms_delta_table_for_deleted_records_in_batch_file(context):
365397
batch_record = {k: normalize(v) for k, v in row.to_dict().items()}
366398

367399
validate_imms_delta_record_with_batch_record(
368-
context, batch_record, delete_item, Operation.deleted.value, ActionFlag.deleted.value
400+
context,
401+
batch_record,
402+
delete_item,
403+
Operation.deleted.value,
404+
ActionFlag.deleted.value,
369405
)

0 commit comments

Comments
 (0)