You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

928 lines
26 KiB
C

/***************************************************************************
* Generic routines to manage selection lists.
*
* This file is part of the miniSEED Library.
*
* Copyright (c) 2023 Chad Trabant, EarthScope Data Services
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "libmseed.h"
static int ms_isinteger (const char *string);
static int ms_globmatch (const char *string, const char *pattern);
/**********************************************************************/ /**
* @brief Test the specified parameters for a matching selection entry
*
* Search the ::MS3Selections for an entry matching the provided
* parameters. The ::MS3Selections.sidpattern may contain globbing
* characters. The ::MS3Selections.timewindows many contain start and
* end times set to ::NSTUNSET to denote "open" times.
*
* Positive matching requires:
* @parblock
* -# glob match of sid against sidpattern in selection
* -# time window intersection with range in selection
* -# equal pubversion if selection pubversion > 0
* @endparblock
*
* @param[in] selections ::MS3Selections to search
* @param[in] sid Source ID to match
* @param[in] starttime Start time to match
* @param[in] endtime End time to match
* @param[in] pubversion Publication version to match
* @param[out] ppselecttime Pointer-to-pointer to return the matching ::MS3SelectTime entry
*
* @returns A pointer to matching ::MS3Selections entry successful
* match and NULL for no match or error.
***************************************************************************/
const MS3Selections *
ms3_matchselect (const MS3Selections *selections, const char *sid, nstime_t starttime,
nstime_t endtime, int pubversion, const MS3SelectTime **ppselecttime)
{
const MS3Selections *findsl = NULL;
const MS3SelectTime *findst = NULL;
const MS3SelectTime *matchst = NULL;
if (selections)
{
findsl = selections;
while (findsl)
{
if (ms_globmatch (sid, findsl->sidpattern))
{
if (findsl->pubversion > 0 && findsl->pubversion != pubversion)
{
findsl = findsl->next;
continue;
}
/* If no time selection, this is a match */
if (!findsl->timewindows)
{
if (ppselecttime)
*ppselecttime = NULL;
return findsl;
}
/* Otherwise, search the time selections */
findst = findsl->timewindows;
while (findst)
{
if (starttime != NSTERROR && starttime != NSTUNSET &&
findst->starttime != NSTERROR && findst->starttime != NSTUNSET &&
(starttime < findst->starttime && !(starttime <= findst->starttime && endtime >= findst->starttime)))
{
findst = findst->next;
continue;
}
else if (endtime != NSTERROR && endtime != NSTUNSET &&
findst->endtime != NSTERROR && findst->endtime != NSTUNSET &&
(endtime > findst->endtime && !(starttime <= findst->endtime && endtime >= findst->endtime)))
{
findst = findst->next;
continue;
}
matchst = findst;
break;
}
}
if (matchst)
break;
else
findsl = findsl->next;
}
}
if (ppselecttime)
*ppselecttime = matchst;
return (matchst) ? findsl : NULL;
} /* End of ms3_matchselect() */
/**********************************************************************/ /**
* @brief Test the ::MS3Record for a matching selection entry
*
* Search the ::MS3Selections for an entry matching the provided
* parameters.
*
* Positive matching requires:
* @parblock
* -# glob match of sid against sidpattern in selection
* -# time window intersection with range in selection
* -# equal pubversion if selection pubversion > 0
* @endparblock
*
* @param[in] selections ::MS3Selections to search
* @param[in] msr ::MS3Record to match against selections
* @param[out] ppselecttime Pointer-to-pointer to return the matching ::MS3SelectTime entry
*
* @returns A pointer to matching ::MS3Selections entry successful
* match and NULL for no match or error.
***************************************************************************/
const MS3Selections *
msr3_matchselect (const MS3Selections *selections, const MS3Record *msr,
const MS3SelectTime **ppselecttime)
{
nstime_t endtime;
if (!selections || !msr)
return NULL;
endtime = msr3_endtime (msr);
return ms3_matchselect (selections, msr->sid, msr->starttime, endtime,
msr->pubversion, ppselecttime);
} /* End of msr3_matchselect() */
/**********************************************************************/ /**
* @brief Add selection parameters to selection list.
*
* The \a sidpattern may contain globbing characters.
*
* The \a starttime and \a endtime may be set to ::NSTUNSET to denote
* "open" times.
*
* The \a pubversion may be set to 0 to match any publication
* version.
*
* @param[in] ppselections ::MS3Selections to add new selection to
* @param[in] sidpattern Source ID pattern, may contain globbing characters
* @param[in] starttime Start time for selection, ::NSTUNSET for open
* @param[in] endtime End time for selection, ::NSTUNSET for open
* @param[in] pubversion Publication version for selection, 0 for any
*
* @returns 0 on success and -1 on error.
*
* \ref MessageOnError - this function logs a message on error
***************************************************************************/
int
ms3_addselect (MS3Selections **ppselections, const char *sidpattern,
nstime_t starttime, nstime_t endtime, uint8_t pubversion)
{
MS3Selections *newsl = NULL;
MS3SelectTime *newst = NULL;
if (!ppselections || !sidpattern)
{
ms_log (2, "Required argument not defined: 'ppselections' or 'sidpattern'\n");
return -1;
}
/* Allocate new SelectTime and populate */
if (!(newst = (MS3SelectTime *)libmseed_memory.malloc (sizeof (MS3SelectTime))))
{
ms_log (2, "Cannot allocate memory\n");
return -1;
}
memset (newst, 0, sizeof (MS3SelectTime));
newst->starttime = starttime;
newst->endtime = endtime;
/* Add new Selections struct to begining of list */
if (!*ppselections)
{
/* Allocate new Selections and populate */
if (!(newsl = (MS3Selections *)libmseed_memory.malloc (sizeof (MS3Selections))))
{
ms_log (2, "Cannot allocate memory\n");
return -1;
}
memset (newsl, 0, sizeof (MS3Selections));
strncpy (newsl->sidpattern, sidpattern, sizeof (newsl->sidpattern));
newsl->sidpattern[sizeof (newsl->sidpattern) - 1] = '\0';
newsl->pubversion = pubversion;
/* Add new MS3SelectTime struct as first in list */
*ppselections = newsl;
newsl->timewindows = newst;
}
else
{
MS3Selections *findsl = *ppselections;
MS3Selections *matchsl = 0;
/* Search for matching MS3Selections entry */
while (findsl)
{
if (!strcmp (findsl->sidpattern, sidpattern) && findsl->pubversion == pubversion)
{
matchsl = findsl;
break;
}
findsl = findsl->next;
}
if (matchsl)
{
/* Add time window selection to beginning of window list */
newst->next = matchsl->timewindows;
matchsl->timewindows = newst;
}
else
{
/* Allocate new MS3Selections and populate */
if (!(newsl = (MS3Selections *)libmseed_memory.malloc (sizeof (MS3Selections))))
{
ms_log (2, "Cannot allocate memory\n");
return -1;
}
memset (newsl, 0, sizeof (MS3Selections));
strncpy (newsl->sidpattern, sidpattern, sizeof (newsl->sidpattern));
newsl->sidpattern[sizeof (newsl->sidpattern) - 1] = '\0';
newsl->pubversion = pubversion;
/* Add new MS3Selections to beginning of list */
newsl->next = *ppselections;
*ppselections = newsl;
newsl->timewindows = newst;
}
}
return 0;
} /* End of ms3_addselect() */
/**********************************************************************/ /**
* @brief Add selection parameters to a selection list based on
* separate source name codes
*
* The \a network, \a station, \a location, and \a channel arguments may
* contain globbing parameters.
* The \a starttime and \a endtime may be set to ::NSTUNSET to denote
* "open" times.
*
* The \a pubversion may be set to 0 to match any publication
* version.
*
* If any of the naming parameters are not supplied (pointer is NULL)
* a wildcard for all matches is substituted.
*
* As a special case, if the location code (loc) is set to \c "--" to
* match an empty location code it will be translated to an empty string
* to match libmseed's notation.
*
* @param[in] ppselections ::MS3Selections to add new selection to
* @param[in] network Network code, may contain globbing characters
* @param[in] station Statoin code, may contain globbing characters
* @param[in] location Location code, may contain globbing characters
* @param[in] channel channel code, may contain globbing characters
* @param[in] starttime Start time for selection, ::NSTUNSET for open
* @param[in] endtime End time for selection, ::NSTUNSET for open
* @param[in] pubversion Publication version for selection, 0 for any
*
* @return 0 on success and -1 on error.
*
* \ref MessageOnError - this function logs a message on error
***************************************************************************/
int
ms3_addselect_comp (MS3Selections **ppselections, char *network, char *station,
char *location, char *channel, nstime_t starttime,
nstime_t endtime, uint8_t pubversion)
{
char sidpattern[100];
char selnet[20];
char selsta[20];
char selloc[20];
char selchan[20];
if (!ppselections)
{
ms_log (2, "Required argument not defined: 'ppselections'\n");
return -1;
}
if (network)
{
strncpy (selnet, network, sizeof (selnet));
selnet[sizeof (selnet) - 1] = '\0';
}
else
{
strcpy (selnet, "*");
}
if (station)
{
strncpy (selsta, station, sizeof (selsta));
selsta[sizeof (selsta) - 1] = '\0';
}
else
{
strcpy (selsta, "*");
}
if (location)
{
/* Test for special case blank location ID */
if (!strcmp (location, "--"))
{
selloc[0] = '\0';
}
else
{
strncpy (selloc, location, sizeof (selloc));
selloc[sizeof (selloc) - 1] = '\0';
}
}
else
{
strcpy (selloc, "*");
}
if (channel)
{
/* Convert a 3-character SEED 2.x channel code to an extended code */
if (ms_globmatch (channel, "[?*a-zA-Z0-9][?*a-zA-Z0-9][?*a-zA-Z0-9]"))
{
ms_seedchan2xchan (selchan, channel);
}
else
{
strncpy (selchan, channel, sizeof (selchan));
selchan[sizeof (selchan) - 1] = '\0';
}
}
else
{
strcpy (selchan, "*");
}
/* Create the source identifier globbing match for this entry */
if (ms_nslc2sid (sidpattern, sizeof (sidpattern), 0,
selnet, selsta, selloc, selchan) < 0)
return -1;
/* Add selection to list */
if (ms3_addselect (ppselections, sidpattern, starttime, endtime, pubversion))
return -1;
return 0;
} /* End of ms3_addselect_comp() */
#define MAX_SELECTION_FIELDS 8
/**********************************************************************/ /**
* @brief Read data selections from a file
*
* Selections from a file are added to the specified selections list.
* On errors this routine will leave allocated memory unreachable
* (leaked), it is expected that this is a program failing condition.
*
* As a special case if the filename is "-", selection lines will be
* read from stdin.
*
* Each line of the file contains a single selection and may be one of
* these two line formats:
* @code
* SourceID [Starttime [Endtime [Pubversion]]]
* @endcode
* or
* @code
* Network Station Location Channel [Pubversion [Starttime [Endtime]]]
* @endcode
*
* The \c Starttime and \c Endtime values must be in a form recognized
* by ms_timestr2nstime() and include a full date (i.e. just a year is
* not allowed).
*
* In the latter version, if the "Channel" field is a SEED 2.x channel
* (3-characters) it will automatically be converted into extended
* channel form (band_source_subsource).
*
* In the latter version, the "Pubversion" field, which was "Quality"
* in earlier versions of the library, is assumed to be a publication
* version if it is an integer, otherwise it is ignored.
*
* @returns Count of selections added on success and -1 on error.
*
* \ref MessageOnError - this function logs a message on error
***************************************************************************/
int
ms3_readselectionsfile (MS3Selections **ppselections, const char *filename)
{
FILE *fp;
nstime_t starttime;
nstime_t endtime;
uint8_t pubversion;
char selectline[200];
char *line;
char *fields[MAX_SELECTION_FIELDS];
char *cp;
char next;
int selectcount = 0;
int linecount = 0;
int fieldidx;
uint8_t isstart2;
uint8_t isend3;
uint8_t isstart6;
uint8_t isend7;
if (!ppselections || !filename)
{
ms_log (2, "Required argument not defined: 'ppselections' or 'filename'\n");
return -1;
}
if (strcmp (filename, "-"))
{
if (!(fp = fopen (filename, "rb")))
{
ms_log (2, "Cannot open file %s: %s\n", filename, strerror (errno));
return -1;
}
}
else
{
/* Use stdin as special case */
fp = stdin;
}
while (fgets (selectline, sizeof (selectline) - 1, fp))
{
linecount++;
/* Reset fields array */
for (fieldidx = 0; fieldidx < MAX_SELECTION_FIELDS; fieldidx++)
fields[fieldidx] = NULL;
/* Guarantee termination */
selectline[sizeof (selectline) - 1] = '\0';
line = selectline;
/* Trim trailing whitespace (including newlines, carriage returns, etc.) if any */
cp = line;
while (*cp != '\0')
{
cp++;
}
cp--;
while (cp >= line && isspace((int)(*cp)))
{
*cp = '\0';
cp--;
}
/* Trim leading whitespace if any */
cp = line;
while (isspace((int)(*cp)))
{
line = cp = cp+1;
}
/* Skip empty lines */
if (!strlen (line))
continue;
/* Skip comment lines */
if (*line == '#')
continue;
/* Set fields array to whitespace delimited fields */
cp = line;
next = 0; /* For this loop: 0 = whitespace, 1 = non-whitespace */
fieldidx = 0;
while (*cp && fieldidx < MAX_SELECTION_FIELDS)
{
if (!isspace ((int)(*cp)))
{
/* Field starts at transition from whitespace to non-whitespace */
if (next == 0)
{
fields[fieldidx] = cp;
fieldidx++;
}
next = 1;
}
else
{
/* Field ends at transition from non-whitespace to whitespace */
if (next == 1)
*cp = '\0';
next = 0;
}
cp++;
}
/* Globbing pattern to match the beginning of a date "YYYY[-/,]#..." */
#define INITDATEGLOB "[0-9][0-9][0-9][0-9][-/,][0-9]*"
isstart2 = (fields[1]) ? ms_globmatch (fields[1], INITDATEGLOB) : 0;
isend3 = (fields[2]) ? ms_globmatch (fields[2], INITDATEGLOB) : 0;
isstart6 = (fields[5]) ? ms_globmatch (fields[5], INITDATEGLOB) : 0;
isend7 = (fields[6]) ? ms_globmatch (fields[6], INITDATEGLOB) : 0;
/* Convert starttime to nstime_t */
starttime = NSTUNSET;
cp = NULL;
if (isstart2)
cp = fields[1];
else if (isstart6)
cp = fields[5];
if (cp)
{
starttime = ms_timestr2nstime (cp);
if (starttime == NSTUNSET)
{
ms_log (2, "Cannot convert data selection start time (line %d): %s\n", linecount, cp);
return -1;
}
}
/* Convert endtime to nstime_t */
endtime = NSTUNSET;
cp = NULL;
if (isend3)
cp = fields[2];
else if (isend7)
cp = fields[6];
if (cp)
{
endtime = ms_timestr2nstime (cp);
if (endtime == NSTUNSET)
{
ms_log (2, "Cannot convert data selection end time (line %d): %s\n", linecount, cp);
return -1;
}
}
/* Test for "SourceID [Starttime [Endtime [Pubversion]]]" */
if (fieldidx == 1 ||
(fieldidx == 2 && isstart2) ||
(fieldidx == 3 && isstart2 && isend3) ||
(fieldidx == 4 && isstart2 && isend3 && ms_isinteger(fields[3])))
{
/* Convert publication version to integer */
pubversion = 0;
if (fields[3])
{
long int longpver;
longpver = strtol (fields[3], NULL, 10);
if (longpver < 0 || longpver > 255 )
{
ms_log (2, "Cannot convert publication version (line %d): %s\n", linecount, fields[3]);
return -1;
}
else
{
pubversion = (uint8_t) longpver;
}
}
/* Add selection to list */
if (ms3_addselect (ppselections, fields[0], starttime, endtime, pubversion))
{
ms_log (2, "%s: Error adding selection on line %d\n", filename, linecount);
return -1;
}
}
/* Test for "Network Station Location Channel [Quality [Starttime [Endtime]]]" */
else if (fieldidx == 4 || fieldidx == 5 ||
(fieldidx == 6 && isstart6) ||
(fieldidx == 7 && isstart6 && isend7))
{
/* Convert quality field to publication version if integer */
pubversion = 0;
if (fields[4] && ms_isinteger(fields[4]))
{
long int longpver;
longpver = strtol (fields[4], NULL, 10);
if (longpver < 0 || longpver > 255 )
{
ms_log (2, "Cannot convert publication version (line %d): %s\n", linecount, fields[4]);
return -1;
}
else
{
pubversion = (uint8_t) longpver;
}
}
if (ms3_addselect_comp (ppselections, fields[0], fields[1], fields[2], fields[3],
starttime, endtime, pubversion))
{
ms_log (2, "%s: Error adding selection on line %d\n", filename, linecount);
return -1;
}
}
else
{
ms_log (1, "%s: Skipping unrecognized data selection on line %d\n", filename, linecount);
}
selectcount++;
}
if (fp != stdin)
fclose (fp);
return selectcount;
} /* End of ms_readselectionsfile() */
/**********************************************************************/ /**
* @brief Free all memory associated with a ::MS3Selections
*
* All memory from one or more ::MS3Selections (in a linked list) are freed.
*
* @param[in] selections Start of ::MS3Selections to free
***************************************************************************/
void
ms3_freeselections (MS3Selections *selections)
{
MS3Selections *select;
MS3Selections *selectnext;
MS3SelectTime *selecttime;
MS3SelectTime *selecttimenext;
if (selections)
{
select = selections;
while (select)
{
selectnext = select->next;
selecttime = select->timewindows;
while (selecttime)
{
selecttimenext = selecttime->next;
libmseed_memory.free (selecttime);
selecttime = selecttimenext;
}
libmseed_memory.free (select);
select = selectnext;
}
}
} /* End of ms3_freeselections() */
/**********************************************************************/ /**
* @brief Print the selections list using the ms_log() facility.
*
* All selections are printed with simple formatting.
*
* @param[in] selections Start of ::MS3Selections to print
***************************************************************************/
void
ms3_printselections (const MS3Selections *selections)
{
const MS3Selections *select;
MS3SelectTime *selecttime;
char starttime[50];
char endtime[50];
if (!selections)
return;
select = selections;
while (select)
{
ms_log (0, "Selection: %s pubversion: %d\n",
select->sidpattern, select->pubversion);
selecttime = select->timewindows;
while (selecttime)
{
if (selecttime->starttime != NSTERROR && selecttime->starttime != NSTUNSET)
ms_nstime2timestr (selecttime->starttime, starttime, ISOMONTHDAY_Z, NANO_MICRO_NONE);
else
strncpy (starttime, "No start time", sizeof (starttime) - 1);
if (selecttime->endtime != NSTERROR && selecttime->endtime != NSTUNSET)
ms_nstime2timestr (selecttime->endtime, endtime, ISOMONTHDAY_Z, NANO_MICRO_NONE);
else
strncpy (endtime, "No end time", sizeof (endtime) - 1);
ms_log (0, " %30s %30s\n", starttime, endtime);
selecttime = selecttime->next;
}
select = select->next;
}
} /* End of ms3_printselections() */
/***************************************************************************
* ms_isinteger:
*
* Test a string for all digits, i.e. and integer
*
* Returns 1 if is integer and 0 otherwise.
***************************************************************************/
static int
ms_isinteger (const char *string)
{
while (*string)
{
if (!isdigit((int)(*string)))
return 0;
string++;
}
return 1;
}
/***********************************************************************
* robust glob pattern matcher
* ozan s. yigit/dec 1994
* public domain
*
* glob patterns:
* * matches zero or more characters
* ? matches any single character
* [set] matches any character in the set
* [^set] matches any character NOT in the set
* where a set is a group of characters or ranges. a range
* is written as two characters seperated with a hyphen: a-z denotes
* all characters between a to z inclusive.
* [-set] set matches a literal hypen and any character in the set
* []set] matches a literal close bracket and any character in the set
*
* char matches itself except where char is '*' or '?' or '['
* \char matches char, including any pattern character
*
* examples:
* a*c ac abc abbc ...
* a?c acc abc aXc ...
* a[a-z]c aac abc acc ...
* a[-a-z]c a-c aac abc ...
*
* Revision 1.4 2004/12/26 12:38:00 ct
* Changed function name (amatch -> globmatch), variables and
* formatting for clarity. Also add matching header globmatch.h.
*
* Revision 1.3 1995/09/14 23:24:23 oz
* removed boring test/main code.
*
* Revision 1.2 94/12/11 10:38:15 oz
* charset code fixed. it is now robust and interprets all
* variations of charset [i think] correctly, including [z-a] etc.
*
* Revision 1.1 94/12/08 12:45:23 oz
* Initial revision
***********************************************************************/
#define GLOBMATCH_TRUE 1
#define GLOBMATCH_FALSE 0
#define GLOBMATCH_NEGATE '^' /* std char set negation char */
/***********************************************************************
* ms_globmatch:
*
* Check if a string matches a globbing pattern.
*
* Return 0 if string does not match pattern and non-zero otherwise.
**********************************************************************/
static int
ms_globmatch (const char *string, const char *pattern)
{
int negate;
int match;
int c;
while (*pattern)
{
if (!*string && *pattern != '*')
return GLOBMATCH_FALSE;
switch (c = *pattern++)
{
case '*':
while (*pattern == '*')
pattern++;
if (!*pattern)
return GLOBMATCH_TRUE;
if (*pattern != '?' && *pattern != '[' && *pattern != '\\')
while (*string && *pattern != *string)
string++;
while (*string)
{
if (ms_globmatch (string, pattern))
return GLOBMATCH_TRUE;
string++;
}
return GLOBMATCH_FALSE;
case '?':
if (*string)
break;
return GLOBMATCH_FALSE;
/* set specification is inclusive, that is [a-z] is a, z and
* everything in between. this means [z-a] may be interpreted
* as a set that contains z, a and nothing in between.
*/
case '[':
if (*pattern != GLOBMATCH_NEGATE)
negate = GLOBMATCH_FALSE;
else
{
negate = GLOBMATCH_TRUE;
pattern++;
}
match = GLOBMATCH_FALSE;
while (!match && (c = *pattern++))
{
if (!*pattern)
return GLOBMATCH_FALSE;
if (*pattern == '-') /* c-c */
{
if (!*++pattern)
return GLOBMATCH_FALSE;
if (*pattern != ']')
{
if (*string == c || *string == *pattern ||
(*string > c && *string < *pattern))
match = GLOBMATCH_TRUE;
}
else
{ /* c-] */
if (*string >= c)
match = GLOBMATCH_TRUE;
break;
}
}
else /* cc or c] */
{
if (c == *string)
match = GLOBMATCH_TRUE;
if (*pattern != ']')
{
if (*pattern == *string)
match = GLOBMATCH_TRUE;
}
else
break;
}
}
if (negate == match)
return GLOBMATCH_FALSE;
/* If there is a match, skip past the charset and continue on */
while (*pattern && *pattern != ']')
pattern++;
if (!*pattern++) /* oops! */
return GLOBMATCH_FALSE;
break;
case '\\':
if (*pattern)
c = *pattern++;
default:
if (c != *string)
return GLOBMATCH_FALSE;
break;
}
string++;
}
return !*string;
} /* End of ms_globmatch() */