324 lines
12 KiB
C#
324 lines
12 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
|
|
namespace UniMarc
|
|
{
|
|
public class MarcSubfield
|
|
{
|
|
public char Code { get; set; }
|
|
public string Value { get; set; }
|
|
|
|
public MarcSubfield(char code, string value)
|
|
{
|
|
Code = code;
|
|
Value = value;
|
|
}
|
|
|
|
public override string ToString()
|
|
{
|
|
return $"▼{Code}{Value}";
|
|
}
|
|
}
|
|
|
|
public class MarcField
|
|
{
|
|
public string Tag { get; set; }
|
|
public string Indicators { get; set; } = " ";
|
|
public string ControlValue { get; set; }
|
|
public List<MarcSubfield> Subfields { get; set; } = new List<MarcSubfield>();
|
|
|
|
public bool IsControlField => int.TryParse(Tag, out int tagNum) && tagNum < 10;
|
|
|
|
public MarcField(string tag)
|
|
{
|
|
Tag = tag;
|
|
}
|
|
|
|
public string GetSubfieldValue(char code)
|
|
{
|
|
var sub = Subfields.FirstOrDefault(s => s.Code == code);
|
|
return sub != null ? sub.Value : string.Empty;
|
|
}
|
|
|
|
public override string ToString()
|
|
{
|
|
if (IsControlField)
|
|
return $"{Tag}\t \t{ControlValue}▲";
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
sb.Append($"{Tag}\t{Indicators}\t");
|
|
foreach (var sub in Subfields)
|
|
{
|
|
sb.Append(sub.ToString());
|
|
}
|
|
sb.Append("▲");
|
|
return sb.ToString();
|
|
}
|
|
}
|
|
|
|
public class MarcParser
|
|
{
|
|
public string Leader { get; set; } = "00000nam 2200000 k 4500";
|
|
public List<MarcField> Fields { get; set; } = new List<MarcField>();
|
|
|
|
private const char SUBFIELD_MARKER = '▼';
|
|
private const char FIELD_TERMINATOR = '▲';
|
|
private const char RECORD_TERMINATOR = '\x1D';
|
|
|
|
public MarcParser() { }
|
|
|
|
public void ParseMnemonic(string data)
|
|
{
|
|
Fields.Clear();
|
|
if (string.IsNullOrEmpty(data)) return;
|
|
|
|
string[] lines = data.Split(new[] { '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);
|
|
foreach (var line in lines)
|
|
{
|
|
string cleanLine = line.Trim();
|
|
if (cleanLine.Length < 3) continue;
|
|
|
|
string tag = cleanLine.Substring(0, 3);
|
|
MarcField field = new MarcField(tag);
|
|
|
|
string[] parts = cleanLine.Split('\t');
|
|
|
|
if (field.IsControlField)
|
|
{
|
|
if (parts.Length >= 3)
|
|
field.ControlValue = parts[2].TrimEnd(FIELD_TERMINATOR, ' ');
|
|
else
|
|
field.ControlValue = cleanLine.Substring(Math.Min(cleanLine.Length, 3)).Trim('\t', ' ', FIELD_TERMINATOR);
|
|
}
|
|
else
|
|
{
|
|
if (parts.Length >= 2)
|
|
field.Indicators = parts[1].PadRight(2).Substring(0, 2);
|
|
|
|
string dataPart = parts.Length >= 3 ? parts[2] : "";
|
|
if (parts.Length < 3 && cleanLine.Length > 5)
|
|
dataPart = cleanLine.Substring(5);
|
|
|
|
dataPart = dataPart.TrimEnd(FIELD_TERMINATOR);
|
|
ParseSubfields(field, dataPart);
|
|
}
|
|
Fields.Add(field);
|
|
}
|
|
}
|
|
|
|
private void ParseSubfields(MarcField field, string dataPart)
|
|
{
|
|
if (string.IsNullOrEmpty(dataPart)) return;
|
|
|
|
if (dataPart.Contains(SUBFIELD_MARKER))
|
|
{
|
|
string[] subfields = dataPart.Split(new[] { SUBFIELD_MARKER }, StringSplitOptions.RemoveEmptyEntries);
|
|
foreach (var s in subfields)
|
|
{
|
|
if (s.Length >= 1)
|
|
field.Subfields.Add(new MarcSubfield(s[0], s.Substring(1).TrimEnd(FIELD_TERMINATOR)));
|
|
}
|
|
}
|
|
else if (dataPart.Contains('\x1F'))
|
|
{
|
|
string[] subfields = dataPart.Split(new[] { '\x1F' }, StringSplitOptions.RemoveEmptyEntries);
|
|
foreach (var s in subfields)
|
|
{
|
|
if (s.Length >= 1)
|
|
field.Subfields.Add(new MarcSubfield(s[0], s.Substring(1)));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int k = 0; k < dataPart.Length; k++)
|
|
{
|
|
if (char.IsLetter(dataPart[k]) && (k == 0 || dataPart[k - 1] == ' ' || dataPart[k - 1] == '^' || dataPart[k - 1] == '\x1F'))
|
|
{
|
|
char code = dataPart[k];
|
|
int next = -1;
|
|
for (int m = k + 1; m < dataPart.Length - 1; m++)
|
|
{
|
|
if (dataPart[m] == ' ' && char.IsLetter(dataPart[m + 1]))
|
|
{
|
|
next = m;
|
|
break;
|
|
}
|
|
}
|
|
|
|
string val = next == -1 ? dataPart.Substring(k + 1) : dataPart.Substring(k + 1, next - k - 1);
|
|
field.Subfields.Add(new MarcSubfield(code, val.Trim()));
|
|
if (next != -1) k = next;
|
|
else break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public void ParseFullMarc(string data)
|
|
{
|
|
Fields.Clear();
|
|
if (string.IsNullOrEmpty(data) || data.Length < 24) return;
|
|
|
|
Leader = data.Substring(0, 24);
|
|
if (!int.TryParse(Leader.Substring(12, 5), out int baseAddress)) return;
|
|
|
|
// Detection: Is the directory using Standard Byte Offsets (ANSI) or Scaled Byte Offsets (Unicode/UTF16)?
|
|
bool isScaled = false;
|
|
if (data.Length >= 31)
|
|
{
|
|
if (int.TryParse(data.Substring(27, 4), out int len008) && len008 > 75)
|
|
isScaled = true;
|
|
}
|
|
|
|
int directoryLength = baseAddress - 24;
|
|
int entryCount = directoryLength / 12;
|
|
|
|
for (int i = 0; i < entryCount; i++)
|
|
{
|
|
int entryStart = 24 + (i * 12);
|
|
if (entryStart + 12 > data.Length) break;
|
|
if (data[entryStart] == '\x1E' || data[entryStart] == '^' || data[entryStart] == FIELD_TERMINATOR) break;
|
|
|
|
string tag = data.Substring(entryStart, 3);
|
|
if (!int.TryParse(data.Substring(entryStart + 3, 4), out int length)) continue;
|
|
if (!int.TryParse(data.Substring(entryStart + 7, 5), out int offset)) continue;
|
|
|
|
// Scaling logic: directory values represent Unicode byte offsets (2x chars)
|
|
// Integer division (offset / 2) maps the byte offset to the starting char index.
|
|
// Addition of 1 to length before division handles odd byte-lengths (markers).
|
|
int actualOffset = isScaled ? (offset / 2) : offset;
|
|
int actualLength = isScaled ? ((length + 1) / 2) : length;
|
|
|
|
if (baseAddress + actualOffset >= data.Length) continue;
|
|
if (baseAddress + actualOffset + actualLength > data.Length)
|
|
actualLength = data.Length - (baseAddress + actualOffset);
|
|
|
|
string fieldData = data.Substring(baseAddress + actualOffset, actualLength);
|
|
fieldData = fieldData.TrimEnd('\x1E', '\x1D', FIELD_TERMINATOR, '^', ' ');
|
|
|
|
MarcField field = new MarcField(tag);
|
|
if (field.IsControlField)
|
|
field.ControlValue = fieldData;
|
|
else
|
|
{
|
|
if (fieldData.Length >= 2)
|
|
{
|
|
field.Indicators = fieldData.Substring(0, 2);
|
|
ParseSubfields(field, fieldData.Substring(2));
|
|
}
|
|
else if (fieldData.Length > 0)
|
|
ParseSubfields(field, fieldData);
|
|
}
|
|
Fields.Add(field);
|
|
}
|
|
}
|
|
|
|
public List<T> GetTag<T>(string path)
|
|
{
|
|
if (string.IsNullOrEmpty(path)) return new List<T>();
|
|
|
|
string tag = path.Substring(0, 3);
|
|
char? subCode = path.Length > 3 ? (char?)path[3] : null;
|
|
|
|
var fields = Fields.Where(f => f.Tag == tag).ToList();
|
|
if (fields.Count == 0) return new List<T>();
|
|
|
|
if (typeof(T) == typeof(MarcField))
|
|
return fields.Cast<T>().ToList();
|
|
|
|
if (typeof(T) == typeof(MarcSubfield))
|
|
{
|
|
if (!subCode.HasValue) return new List<T>();
|
|
var subResults = new List<MarcSubfield>();
|
|
foreach (var f in fields)
|
|
subResults.AddRange(f.Subfields.Where(s => s.Code == subCode.Value));
|
|
return subResults.Cast<T>().ToList();
|
|
}
|
|
|
|
if (typeof(T) == typeof(string))
|
|
{
|
|
var stringResults = new List<string>();
|
|
foreach (var f in fields)
|
|
{
|
|
if (f.IsControlField)
|
|
stringResults.Add(f.ControlValue);
|
|
else
|
|
{
|
|
if (subCode.HasValue)
|
|
stringResults.AddRange(f.Subfields.Where(s => s.Code == subCode.Value).Select(s => s.Value));
|
|
else
|
|
stringResults.AddRange(f.Subfields.Select(s => s.Value));
|
|
}
|
|
}
|
|
return stringResults.Cast<T>().ToList();
|
|
}
|
|
|
|
return new List<T>();
|
|
}
|
|
|
|
public List<string> GetTag(string path)
|
|
{
|
|
return GetTag<string>(path);
|
|
}
|
|
|
|
public void SetTag(string path, string value, string indicators = " ")
|
|
{
|
|
if (string.IsNullOrEmpty(path) || path.Length < 3) return;
|
|
|
|
string tag = path.Substring(0, 3);
|
|
bool isControl = int.TryParse(tag, out int tagNum) && tagNum < 10;
|
|
|
|
var field = Fields.FirstOrDefault(f => f.Tag == tag);
|
|
if (field == null)
|
|
{
|
|
field = new MarcField(tag) { Indicators = indicators };
|
|
Fields.Add(field);
|
|
Fields = Fields.OrderBy(f => f.Tag).ToList();
|
|
}
|
|
|
|
if (isControl)
|
|
field.ControlValue = value;
|
|
else
|
|
{
|
|
if (path.Length < 4) throw new ArgumentException("Subfield code required for data fields");
|
|
char subCode = path[3];
|
|
var sub = field.Subfields.FirstOrDefault(s => s.Code == subCode);
|
|
if (sub != null) sub.Value = value;
|
|
else field.Subfields.Add(new MarcSubfield(subCode, value));
|
|
}
|
|
}
|
|
|
|
public string Get008Segment(int offset, int length)
|
|
{
|
|
var valLine = GetTag("008").FirstOrDefault();
|
|
if (string.IsNullOrEmpty(valLine) || valLine.Length < offset + length) return string.Empty;
|
|
return valLine.Substring(offset, length);
|
|
}
|
|
|
|
public void Set008Segment(int offset, int length, string value)
|
|
{
|
|
var valLine = GetTag("008").FirstOrDefault() ?? new string(' ', 40);
|
|
if (valLine.Length < 40) valLine = valLine.PadRight(40);
|
|
|
|
StringBuilder sb = new StringBuilder(valLine);
|
|
for (int i = 0; i < length; i++)
|
|
{
|
|
char c = (i < value.Length) ? value[i] : ' ';
|
|
if (offset + i < sb.Length)
|
|
sb[offset + i] = c;
|
|
}
|
|
SetTag("008", sb.ToString());
|
|
}
|
|
|
|
public string ToMnemonicString()
|
|
{
|
|
StringBuilder sb = new StringBuilder();
|
|
foreach (var field in Fields)
|
|
sb.AppendLine(field.ToString());
|
|
return sb.ToString();
|
|
}
|
|
}
|
|
}
|