Skip to content

Commit

Permalink
Merge pull request OpenSimulationInterface#777 from OpenSimulationInt…
Browse files Browse the repository at this point in the history
…erface/760-bug-in-osi-docu-on-tracefiles

Removal of the historical .txt trace file format and related scripts
  • Loading branch information
pmai authored Feb 26, 2024
2 parents 41f14a3 + f38c922 commit 77587cf
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 229 deletions.
22 changes: 0 additions & 22 deletions doc/architecture/formatting_scripts.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,6 @@ endif::[]
The OSI repository contains Python scripts for converting trace files from one format to another.
The formatting scripts are stored in `open-simulation-interface/format/`

**txt2osi.py**

`txt2osi.py` converts plain-text trace files to binary `.osi` trace files.
This script takes the following parameters:

`--data`, `-d`::
String containing the path to the file with serialized data.

`--type`, `-t`::
Optional string describing the message type used to serialize data.
`'SensorView'`, `'GroundTruth'`, or `'SensorData'` are permitted values.
The default value is `'SensorView'`.

`--output`, `-o`::
Optional string containing the name of the output file.
The default value is `'converted.osi'`.

`--compress`, `-c`::
Optional Boolean controlling whether to compress the output to an lzma file.
`True`, or `False` are permitted values.
The default value is `False`.

**osi2read.py**

`osi2read.py` converts trace files to human-readable `.txth` trace files.
Expand Down
11 changes: 6 additions & 5 deletions doc/architecture/trace_file_formats.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,20 @@ endif::[]
[#top-osi_trace_file_formats]
= OSI trace file formats

There are multiple formats for storing multiple serialized OSI messages in one trace file.
There are two formats for storing multiple serialized OSI messages in one trace file.

*.osi::
Binary trace file.
Messages are separated by a length specification before each message.
The length is represented by a four-byte, little-endian, unsigned integer.
The length does not include the integer itself.
*.txt::
Plain-text trace file.
Messages are separated by `$$__$$`.
*.txth::
Human-readable plain-text trace file.
Messages are separated by newlines.
NOTE: Previous releases of OSI also supported a so-called plain-text trace file format, with file extension `.txt`.
This legacy format did not contain plain-text, but rather binary protobuf messages separated by a special separator.
For obvious reasons the format was deprecated and fully replaced with the `.osi` binary file format.
This release no longer contains any support for the legacy `.txt` file format.
These files may be used for manual checks.
117 changes: 16 additions & 101 deletions format/OSITrace.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@

warnings.simplefilter("default")

SEPARATOR = b"$$__$$"
SEPARATOR_LENGTH = len(SEPARATOR)
BUFFER_SIZE = 1000000


Expand Down Expand Up @@ -47,7 +45,7 @@ def __init__(self, path=None, type_name="SensorView"):
self.retrieved_scenario_size = 0
self._int_length = len(struct.pack("<L", 0))

def from_file(self, path, type_name="SensorView", max_index=-1, format_type=None):
def from_file(self, path, type_name="SensorView", max_index=-1):
"""Import a scenario from a file"""

if path.lower().endswith((".lzma", ".xz")):
Expand All @@ -56,69 +54,7 @@ def from_file(self, path, type_name="SensorView", max_index=-1, format_type=None
self.scenario_file = open(path, "rb")

self.type_name = type_name
self.format_type = format_type

if self.format_type == "separated":
# warnings.warn("The separated trace files will be completely removed in the near future. Please convert them to *.osi files with the converter in the main OSI repository.", PendingDeprecationWarning)
self.timestep_count = self.retrieve_message_offsets(max_index)
else:
self.timestep_count = self.retrieve_message()

def retrieve_message_offsets(self, max_index):
"""
Retrieve the offsets of all the messages of the scenario and store them
in the `message_offsets` attribute of the object
It returns the number of discovered timesteps
"""
scenario_size = get_size_from_file_stream(self.scenario_file)

if max_index == -1:
max_index = float("inf")

buffer_deque = deque(maxlen=2)

self.message_offsets = [0]
eof = False

self.scenario_file.seek(0)

while not eof and len(self.message_offsets) <= max_index:
found = -1 # SEP offset in buffer
buffer_deque.clear()

while found == -1 and not eof:
new_read = self.scenario_file.read(BUFFER_SIZE)
buffer_deque.append(new_read)
buffer = b"".join(buffer_deque)
found = buffer.find(SEPARATOR)
eof = len(new_read) != BUFFER_SIZE

buffer_offset = self.scenario_file.tell() - len(buffer)
message_offset = found + buffer_offset + SEPARATOR_LENGTH
self.message_offsets.append(message_offset)

self.scenario_file.seek(message_offset)

while eof and found != -1:
buffer = buffer[found + SEPARATOR_LENGTH :]
found = buffer.find(SEPARATOR)

buffer_offset = scenario_size - len(buffer)

message_offset = found + buffer_offset + SEPARATOR_LENGTH

if message_offset >= scenario_size:
break
self.message_offsets.append(message_offset)

if eof:
self.retrieved_scenario_size = scenario_size
else:
self.retrieved_scenario_size = self.message_offsets[-1]
self.message_offsets.pop()

return len(self.message_offsets)
self.timestep_count = self.retrieve_message()

def retrieve_message(self):
scenario_size = get_size_from_file_stream(self.scenario_file)
Expand Down Expand Up @@ -180,42 +116,21 @@ def get_messages_in_index_range(self, begin, end):
for abs_message_offset in self.message_offsets[begin:end]
]

if self.format_type == "separated":
message_sequence_len = abs_last_offset - abs_first_offset - SEPARATOR_LENGTH
serialized_messages_extract = self.scenario_file.read(message_sequence_len)

for rel_index, rel_message_offset in enumerate(rel_message_offsets):
rel_begin = rel_message_offset
rel_end = (
rel_message_offsets[rel_index + 1] - SEPARATOR_LENGTH
if rel_index + 1 < len(rel_message_offsets)
else message_sequence_len
)
message = MESSAGES_TYPE[self.type_name]()
serialized_message = serialized_messages_extract[rel_begin:rel_end]
message.ParseFromString(serialized_message)
yield message

elif self.format_type is None:
message_sequence_len = abs_last_offset - abs_first_offset
serialized_messages_extract = self.scenario_file.read(message_sequence_len)

for rel_index, rel_message_offset in enumerate(rel_message_offsets):
rel_begin = rel_message_offset + self._int_length
rel_end = (
rel_message_offsets[rel_index + 1]
if rel_index + 1 < len(rel_message_offsets)
else message_sequence_len
)

message = MESSAGES_TYPE[self.type_name]()
serialized_message = serialized_messages_extract[rel_begin:rel_end]
message.ParseFromString(serialized_message)
yield message
message_sequence_len = abs_last_offset - abs_first_offset
serialized_messages_extract = self.scenario_file.read(message_sequence_len)

else:
self.scenario_file.close()
raise Exception(f"The defined format {self.format_type} does not exist.")
for rel_index, rel_message_offset in enumerate(rel_message_offsets):
rel_begin = rel_message_offset + self._int_length
rel_end = (
rel_message_offsets[rel_index + 1]
if rel_index + 1 < len(rel_message_offsets)
else message_sequence_len
)

message = MESSAGES_TYPE[self.type_name]()
serialized_message = serialized_messages_extract[rel_begin:rel_end]
message.ParseFromString(serialized_message)
yield message

def make_readable(self, name, interval=None, index=None):
self.scenario_file.seek(0)
Expand Down
16 changes: 3 additions & 13 deletions format/osi2read.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
"""
This program converts serialized txt/osi trace files into a human readable txth file.
This program converts serialized osi trace files into a human readable txth file.
Example usage:
python3 osi2read.py -d trace.osi -o myreadableosifile
python3 osi2read.py -d trace.txt -f separated -o myreadableosifile
"""

from OSITrace import OSITrace
Expand All @@ -19,7 +18,7 @@ def command_line_arguments():
dir_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))

parser = argparse.ArgumentParser(
description="Convert a serialized osi/txt trace file to a readable txth output.",
description="Convert a serialized osi trace file to a readable txth output.",
prog="osi2read converter",
)
parser.add_argument(
Expand All @@ -42,15 +41,6 @@ def command_line_arguments():
type=str,
required=False,
)
parser.add_argument(
"--format",
"-f",
help="Set the format type of the trace.",
choices=["separated", None],
default=None,
type=str,
required=False,
)

return parser.parse_args()

Expand All @@ -61,7 +51,7 @@ def main():

# Initialize the OSI trace class
trace = OSITrace()
trace.from_file(path=args.data, type_name=args.type, format_type=args.format)
trace.from_file(path=args.data, type_name=args.type)

args.output = args.output.split(".", 1)[0] + ".txth"

Expand Down
88 changes: 0 additions & 88 deletions format/txt2osi.py

This file was deleted.

0 comments on commit 77587cf

Please sign in to comment.