Files
Unimarc/unimarc/unimarc/Helper/MarcParser.cs

324 lines
12 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace UniMarc
{
public class MarcSubfield
{
public char Code { get; set; }
public string Value { get; set; }
public MarcSubfield(char code, string value)
{
Code = code;
Value = value;
}
public override string ToString()
{
return $"▼{Code}{Value}";
}
}
public class MarcField
{
public string Tag { get; set; }
public string Indicators { get; set; } = " ";
public string ControlValue { get; set; }
public List<MarcSubfield> Subfields { get; set; } = new List<MarcSubfield>();
public bool IsControlField => int.TryParse(Tag, out int tagNum) && tagNum < 10;
public MarcField(string tag)
{
Tag = tag;
}
public string GetSubfieldValue(char code)
{
var sub = Subfields.FirstOrDefault(s => s.Code == code);
return sub != null ? sub.Value : string.Empty;
}
public override string ToString()
{
if (IsControlField)
return $"{Tag}\t \t{ControlValue}▲";
StringBuilder sb = new StringBuilder();
sb.Append($"{Tag}\t{Indicators}\t");
foreach (var sub in Subfields)
{
sb.Append(sub.ToString());
}
sb.Append("▲");
return sb.ToString();
}
}
public class MarcParser
{
public string Leader { get; set; } = "00000nam 2200000 k 4500";
public List<MarcField> Fields { get; set; } = new List<MarcField>();
private const char SUBFIELD_MARKER = '▼';
private const char FIELD_TERMINATOR = '▲';
private const char RECORD_TERMINATOR = '\x1D';
public MarcParser() { }
public void ParseMnemonic(string data)
{
Fields.Clear();
if (string.IsNullOrEmpty(data)) return;
string[] lines = data.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var line in lines)
{
string cleanLine = line.Trim();
if (cleanLine.Length < 3) continue;
string tag = cleanLine.Substring(0, 3);
MarcField field = new MarcField(tag);
string[] parts = cleanLine.Split('\t');
if (field.IsControlField)
{
if (parts.Length >= 3)
field.ControlValue = parts[2].TrimEnd(FIELD_TERMINATOR, ' ');
else
field.ControlValue = cleanLine.Substring(Math.Min(cleanLine.Length, 3)).Trim('\t', ' ', FIELD_TERMINATOR);
}
else
{
if (parts.Length >= 2)
field.Indicators = parts[1].PadRight(2).Substring(0, 2);
string dataPart = parts.Length >= 3 ? parts[2] : "";
if (parts.Length < 3 && cleanLine.Length > 5)
dataPart = cleanLine.Substring(5);
dataPart = dataPart.TrimEnd(FIELD_TERMINATOR);
ParseSubfields(field, dataPart);
}
Fields.Add(field);
}
}
private void ParseSubfields(MarcField field, string dataPart)
{
if (string.IsNullOrEmpty(dataPart)) return;
if (dataPart.Contains(SUBFIELD_MARKER))
{
string[] subfields = dataPart.Split(new[] { SUBFIELD_MARKER }, StringSplitOptions.RemoveEmptyEntries);
foreach (var s in subfields)
{
if (s.Length >= 1)
field.Subfields.Add(new MarcSubfield(s[0], s.Substring(1).TrimEnd(FIELD_TERMINATOR)));
}
}
else if (dataPart.Contains('\x1F'))
{
string[] subfields = dataPart.Split(new[] { '\x1F' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var s in subfields)
{
if (s.Length >= 1)
field.Subfields.Add(new MarcSubfield(s[0], s.Substring(1)));
}
}
else
{
for (int k = 0; k < dataPart.Length; k++)
{
if (char.IsLetter(dataPart[k]) && (k == 0 || dataPart[k - 1] == ' ' || dataPart[k - 1] == '^' || dataPart[k - 1] == '\x1F'))
{
char code = dataPart[k];
int next = -1;
for (int m = k + 1; m < dataPart.Length - 1; m++)
{
if (dataPart[m] == ' ' && char.IsLetter(dataPart[m + 1]))
{
next = m;
break;
}
}
string val = next == -1 ? dataPart.Substring(k + 1) : dataPart.Substring(k + 1, next - k - 1);
field.Subfields.Add(new MarcSubfield(code, val.Trim()));
if (next != -1) k = next;
else break;
}
}
}
}
public void ParseFullMarc(string data)
{
Fields.Clear();
if (string.IsNullOrEmpty(data) || data.Length < 24) return;
Leader = data.Substring(0, 24);
if (!int.TryParse(Leader.Substring(12, 5), out int baseAddress)) return;
// Detection: Is the directory using Standard Byte Offsets (ANSI) or Scaled Byte Offsets (Unicode/UTF16)?
bool isScaled = false;
if (data.Length >= 31)
{
if (int.TryParse(data.Substring(27, 4), out int len008) && len008 > 75)
isScaled = true;
}
int directoryLength = baseAddress - 24;
int entryCount = directoryLength / 12;
for (int i = 0; i < entryCount; i++)
{
int entryStart = 24 + (i * 12);
if (entryStart + 12 > data.Length) break;
if (data[entryStart] == '\x1E' || data[entryStart] == '^' || data[entryStart] == FIELD_TERMINATOR) break;
string tag = data.Substring(entryStart, 3);
if (!int.TryParse(data.Substring(entryStart + 3, 4), out int length)) continue;
if (!int.TryParse(data.Substring(entryStart + 7, 5), out int offset)) continue;
// Scaling logic: directory values represent Unicode byte offsets (2x chars)
// Integer division (offset / 2) maps the byte offset to the starting char index.
// Addition of 1 to length before division handles odd byte-lengths (markers).
int actualOffset = isScaled ? (offset / 2) : offset;
int actualLength = isScaled ? ((length + 1) / 2) : length;
if (baseAddress + actualOffset >= data.Length) continue;
if (baseAddress + actualOffset + actualLength > data.Length)
actualLength = data.Length - (baseAddress + actualOffset);
string fieldData = data.Substring(baseAddress + actualOffset, actualLength);
fieldData = fieldData.TrimEnd('\x1E', '\x1D', FIELD_TERMINATOR, '^', ' ');
MarcField field = new MarcField(tag);
if (field.IsControlField)
field.ControlValue = fieldData;
else
{
if (fieldData.Length >= 2)
{
field.Indicators = fieldData.Substring(0, 2);
ParseSubfields(field, fieldData.Substring(2));
}
else if (fieldData.Length > 0)
ParseSubfields(field, fieldData);
}
Fields.Add(field);
}
}
public List<T> GetTag<T>(string path)
{
if (string.IsNullOrEmpty(path)) return new List<T>();
string tag = path.Substring(0, 3);
char? subCode = path.Length > 3 ? (char?)path[3] : null;
var fields = Fields.Where(f => f.Tag == tag).ToList();
if (fields.Count == 0) return new List<T>();
if (typeof(T) == typeof(MarcField))
return fields.Cast<T>().ToList();
if (typeof(T) == typeof(MarcSubfield))
{
if (!subCode.HasValue) return new List<T>();
var subResults = new List<MarcSubfield>();
foreach (var f in fields)
subResults.AddRange(f.Subfields.Where(s => s.Code == subCode.Value));
return subResults.Cast<T>().ToList();
}
if (typeof(T) == typeof(string))
{
var stringResults = new List<string>();
foreach (var f in fields)
{
if (f.IsControlField)
stringResults.Add(f.ControlValue);
else
{
if (subCode.HasValue)
stringResults.AddRange(f.Subfields.Where(s => s.Code == subCode.Value).Select(s => s.Value));
else
stringResults.AddRange(f.Subfields.Select(s => s.Value));
}
}
return stringResults.Cast<T>().ToList();
}
return new List<T>();
}
public List<string> GetTag(string path)
{
return GetTag<string>(path);
}
public void SetTag(string path, string value, string indicators = " ")
{
if (string.IsNullOrEmpty(path) || path.Length < 3) return;
string tag = path.Substring(0, 3);
bool isControl = int.TryParse(tag, out int tagNum) && tagNum < 10;
var field = Fields.FirstOrDefault(f => f.Tag == tag);
if (field == null)
{
field = new MarcField(tag) { Indicators = indicators };
Fields.Add(field);
Fields = Fields.OrderBy(f => f.Tag).ToList();
}
if (isControl)
field.ControlValue = value;
else
{
if (path.Length < 4) throw new ArgumentException("Subfield code required for data fields");
char subCode = path[3];
var sub = field.Subfields.FirstOrDefault(s => s.Code == subCode);
if (sub != null) sub.Value = value;
else field.Subfields.Add(new MarcSubfield(subCode, value));
}
}
public string Get008Segment(int offset, int length)
{
var valLine = GetTag("008").FirstOrDefault();
if (string.IsNullOrEmpty(valLine) || valLine.Length < offset + length) return string.Empty;
return valLine.Substring(offset, length);
}
public void Set008Segment(int offset, int length, string value)
{
var valLine = GetTag("008").FirstOrDefault() ?? new string(' ', 40);
if (valLine.Length < 40) valLine = valLine.PadRight(40);
StringBuilder sb = new StringBuilder(valLine);
for (int i = 0; i < length; i++)
{
char c = (i < value.Length) ? value[i] : ' ';
if (offset + i < sb.Length)
sb[offset + i] = c;
}
SetTag("008", sb.ToString());
}
public string ToMnemonicString()
{
StringBuilder sb = new StringBuilder();
foreach (var field in Fields)
sb.AppendLine(field.ToString());
return sb.ToString();
}
}
}