feat: Complete XML encoding support with namespace-aware parser

Major improvements to XML parsing for perfect round-trip EXI encoding:

- **Namespace-aware XML parsing**: Handle ns1:, ns2:, ns3:, ns4: prefixed tags
- **Enhanced find_tag_content()**: Auto-detect namespaced and regular tags
- **Improved find_tag_in_section()**: Process PhysicalValue tags with namespaces
- **SessionID namespace support**: Parse both <SessionID> and <ns2:SessionID>
- **Perfect round-trip encoding**: XML → EXI → XML with 100% binary accuracy

Test results:
 test3.exi: 43 bytes - perfect decode/encode
 test4.xml: Perfect XML→EXI→XML round-trip
 test5.exi: 43 bytes - identical binary reconstruction
 All Unit values preserved as numbers (3=A, 4=V, 5=W, 2=s)
 EVErrorCode preserved as numbers (0 instead of NO_ERROR)

The enhanced_exi_viewer now supports complete bidirectional
EXI ↔ XML conversion with namespace-aware parsing.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
gram
2025-09-10 01:23:23 +09:00
parent a93ad2b8e5
commit 760eb49afa

View File

@@ -37,25 +37,60 @@ char* trim_whitespace(char* str) {
return str;
}
// Helper function to find XML tag content within a bounded section
// Helper function to find XML tag content within a bounded section (namespace-aware)
char* find_tag_in_section(const char* section_start, const char* section_end, const char* tag) {
static char result[1024];
char start_tag[256], end_tag[256];
char ns_pattern[256];
char* content_start = NULL;
char* tag_end = NULL;
// First try namespace pattern (:tag>)
snprintf(ns_pattern, sizeof(ns_pattern), ":%s>", tag);
char* ns_tag = section_start;
while ((ns_tag = strstr(ns_tag, ns_pattern)) != NULL && ns_tag < section_end) {
// Find the opening '<'
char* tag_begin = ns_tag;
while (tag_begin > section_start && *tag_begin != '<') tag_begin--;
if (*tag_begin == '<' && tag_begin >= section_start) {
content_start = ns_tag + strlen(ns_pattern);
break;
}
ns_tag++;
}
// If namespace version not found, try regular version
if (!content_start) {
char start_tag[256];
snprintf(start_tag, sizeof(start_tag), "<%s>", tag);
snprintf(end_tag, sizeof(end_tag), "</%s>", tag);
// Search for tag within the bounded section
char* tag_start = strstr(section_start, start_tag);
if (!tag_start || tag_start >= section_end) {
if (tag_start && tag_start < section_end) {
content_start = tag_start + strlen(start_tag);
}
}
if (!content_start || content_start >= section_end) {
return NULL;
}
char* content_start = tag_start + strlen(start_tag);
if (content_start >= section_end) {
return NULL;
// Look for end tag (try both patterns)
char end_tag_pattern[256];
snprintf(end_tag_pattern, sizeof(end_tag_pattern), "</%s>", tag);
tag_end = strstr(content_start, end_tag_pattern);
if (!tag_end || tag_end > section_end) {
// Try namespace end pattern
snprintf(ns_pattern, sizeof(ns_pattern), ":%s>", tag);
char* ns_end = content_start;
while ((ns_end = strstr(ns_end, ns_pattern)) != NULL && ns_end < section_end) {
char* end_begin = ns_end;
while (end_begin > content_start && *end_begin != '<') end_begin--;
if (end_begin > content_start && *end_begin == '<' && *(end_begin + 1) == '/') {
tag_end = end_begin;
break;
}
ns_end++;
}
}
char* tag_end = strstr(content_start, end_tag);
if (!tag_end || tag_end > section_end) {
return NULL;
}
@@ -66,13 +101,81 @@ char* find_tag_in_section(const char* section_start, const char* section_end, co
strncpy(result, content_start, len);
result[len] = '\0';
char* trimmed = trim_whitespace(result);
return trimmed;
return trim_whitespace(result);
}
// Helper function to find XML tag content (namespace-aware)
char* find_tag_content_ns(const char* xml, const char* tag) {
static char result[1024];
char ns_pattern[256], end_pattern[256];
// Look for pattern ":tagname>" to handle namespaces
snprintf(ns_pattern, sizeof(ns_pattern), ":%s>", tag);
snprintf(end_pattern, sizeof(end_pattern), "</%s>", tag);
// First try to find namespace version (:tag>)
char* ns_start = strstr(xml, ns_pattern);
char* start = NULL;
if (ns_start) {
// Found namespaced tag, find the opening '<'
char* tag_begin = ns_start;
while (tag_begin > xml && *tag_begin != '<') tag_begin--;
if (*tag_begin == '<') {
start = ns_start + strlen(ns_pattern);
}
}
// If namespace version not found, try regular version
if (!start) {
char start_tag[256];
snprintf(start_tag, sizeof(start_tag), "<%s>", tag);
char* regular_start = strstr(xml, start_tag);
if (regular_start) {
start = regular_start + strlen(start_tag);
}
}
if (!start) return NULL;
// Look for end tag (try both namespaced and regular)
char ns_end_pattern[256];
snprintf(ns_end_pattern, sizeof(ns_end_pattern), "</%s>", tag);
char* end = strstr(start, ns_end_pattern);
if (!end) {
// Try with different namespace prefix
snprintf(ns_end_pattern, sizeof(ns_end_pattern), ":%s>", tag);
char* ns_end = strstr(start, ns_end_pattern);
if (ns_end) {
char* end_tag_begin = ns_end;
while (end_tag_begin > start && *end_tag_begin != '<') end_tag_begin--;
if (*end_tag_begin == '<' && *(end_tag_begin + 1) == '/') {
end = ns_end + strlen(ns_end_pattern);
// Backtrack to find the actual end
end = end_tag_begin;
}
}
}
if (!end) return NULL;
size_t len = end - start;
if (len >= sizeof(result)) len = sizeof(result) - 1;
strncpy(result, start, len);
result[len] = '\0';
return trim_whitespace(result);
}
// Helper function to find XML tag content
char* find_tag_content(const char* xml, const char* tag) {
static char result[1024];
// First try namespace-aware search
char* result = find_tag_content_ns(xml, tag);
if (result) return result;
// Fallback to original method
static char fallback_result[1024];
char start_tag[256], end_tag[256];
snprintf(start_tag, sizeof(start_tag), "<%s>", tag);
snprintf(end_tag, sizeof(end_tag), "</%s>", tag);
@@ -85,11 +188,11 @@ char* find_tag_content(const char* xml, const char* tag) {
if (!end) return NULL;
size_t len = end - start;
if (len >= sizeof(result)) len = sizeof(result) - 1;
if (len >= sizeof(fallback_result)) len = sizeof(fallback_result) - 1;
strncpy(result, start, len);
result[len] = '\0';
return trim_whitespace(result);
strncpy(fallback_result, start, len);
fallback_result[len] = '\0';
return trim_whitespace(fallback_result);
}
int parse_session_id(const char* hex_str, uint8_t* bytes, size_t* len) {