diff --git a/Docs/build/html/Stages/055_documents2TTL/055_documents2TTL.html b/Docs/build/html/Stages/055_documents2TTL/055_documents2TTL.html index 6d3ba6b13..d972467cd 100644 --- a/Docs/build/html/Stages/055_documents2TTL/055_documents2TTL.html +++ b/Docs/build/html/Stages/055_documents2TTL/055_documents2TTL.html @@ -103,9 +103,9 @@ <tbody valign="top"> <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>data</strong> (<em>dict</em>) – ‘CDS’ part of the initial JSON</td> </tr> -<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">abstract</td> +<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">abstract or None if it was not found</td> </tr> -<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">str</td> +<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">str or NoneType</td> </tr> </tbody> </table> @@ -121,9 +121,9 @@ <tbody valign="top"> <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>data</strong> (<em>dict</em>) – ‘CDS’ part of the initial JSON</td> </tr> -<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">arXiv code</td> +<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">arXiv code or None if it was not found</td> </tr> -<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">str</td> +<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">str or NoneType</td> </tr> </tbody> </table> @@ -139,9 +139,9 @@ <tbody valign="top"> <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>data</strong> (<em>dict</em>) – ‘CDS’ part of the initial JSON</td> </tr> -<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">CDS id</td> +<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">CDS id or None if it was not found</td> </tr> -<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">int</td> +<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">int or NoneType</td> </tr> </tbody> </table> @@ -157,9 +157,9 @@ <tbody valign="top"> <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>data</strong> (<em>dict</em>) – ‘CDS’ part of the initial JSON</td> </tr> -<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">CDS internal report number</td> +<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">CDS internal report number or None if it was not found</td> </tr> -<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">unicode</td> +<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">unicode or NoneType</td> </tr> </tbody> </table> @@ -179,10 +179,10 @@ </ul> </td> </tr> -<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">parameter value</p> +<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">parameter value or None if it was not found</p> </td> </tr> -<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">int, str</p> +<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">int, str, NoneType</p> </td> </tr> </tbody> @@ -199,9 +199,9 @@ <tbody valign="top"> <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>data</strong> (<em>dict</em>) – ‘CDS’ part of the initial JSON</td> </tr> -<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">creation date</td> +<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">creation date or None if it was not found</td> </tr> -<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">str</td> +<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">str or NoneType</td> </tr> </tbody> </table> @@ -403,10 +403,10 @@ </ul> </td> </tr> -<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">parameter value</p> +<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">parameter value or None if it was not found</p> </td> </tr> -<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">str, unicode</p> +<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">str, unicode, NoneType</p> </td> </tr> </tbody> @@ -512,9 +512,9 @@ <tbody valign="top"> <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>data</strong> (<em>dict</em>) – ‘CDS’ part of the initial JSON</td> </tr> -<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">report number</td> +<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">report number or None if it was not found</td> </tr> -<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">unicode</td> +<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">unicode or NoneType</td> </tr> </tbody> </table> @@ -530,9 +530,9 @@ <tbody valign="top"> <tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>data</strong> (<em>dict</em>) – ‘CDS’ part of the initial JSON</td> </tr> -<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">title</td> +<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">title or None if it was not found</td> </tr> -<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">str</td> +<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">str or NoneType</td> </tr> </tbody> </table> diff --git a/Docs/build/html/_modules/055_documents2TTL/documents2ttl.html b/Docs/build/html/_modules/055_documents2TTL/documents2ttl.html index 148df665e..7633e5f9b 100644 --- a/Docs/build/html/_modules/055_documents2TTL/documents2ttl.html +++ b/Docs/build/html/_modules/055_documents2TTL/documents2ttl.html @@ -324,8 +324,8 @@ <h1>Source code for 055_documents2TTL.documents2ttl</h1><div class="highlight">< <span class="sd"> :param data: 'CDS' part of the initial JSON</span> <span class="sd"> :type data: dict</span> -<span class="sd"> :return: CDS internal report number</span> -<span class="sd"> :rtype: unicode</span> +<span class="sd"> :return: CDS internal report number or None if it was not found</span> +<span class="sd"> :rtype: unicode or NoneType</span> <span class="sd"> """</span> <span class="k">if</span> <span class="s1">'report_number'</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span> <span class="n">report_number</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'report_number'</span><span class="p">)</span> @@ -347,8 +347,8 @@ <h1>Source code for 055_documents2TTL.documents2ttl</h1><div class="highlight">< <span class="sd"> :param data: 'CDS' part of the initial JSON</span> <span class="sd"> :type data: dict</span> -<span class="sd"> :return: report number</span> -<span class="sd"> :rtype: unicode</span> +<span class="sd"> :return: report number or None if it was not found</span> +<span class="sd"> :rtype: unicode or NoneType</span> <span class="sd"> """</span> <span class="k">if</span> <span class="s1">'report_number'</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span> <span class="n">report_number</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'report_number'</span><span class="p">)</span> @@ -369,8 +369,8 @@ <h1>Source code for 055_documents2TTL.documents2ttl</h1><div class="highlight">< <span class="sd"> :param json_data: 'GLANCE' part of the initial JSON</span> <span class="sd"> :type json_data: dict</span> -<span class="sd"> :return: parameter value</span> -<span class="sd"> :rtype: str, unicode</span> +<span class="sd"> :return: parameter value or None if it was not found</span> +<span class="sd"> :rtype: str, unicode, NoneType</span> <span class="sd"> """</span> <span class="k">if</span> <span class="n">param_name</span> <span class="o">==</span> <span class="s1">'id'</span><span class="p">:</span> <span class="k">return</span> <span class="n">json_data</span><span class="p">[</span><span class="s1">'id'</span><span class="p">]</span> @@ -393,8 +393,8 @@ <h1>Source code for 055_documents2TTL.documents2ttl</h1><div class="highlight">< <span class="sd"> :type param_name: str</span> <span class="sd"> :param json_data: 'CDS' part of the initial JSON</span> -<span class="sd"> :return: parameter value</span> -<span class="sd"> :rtype: int, str</span> +<span class="sd"> :return: parameter value or None if it was not found</span> +<span class="sd"> :rtype: int, str, NoneType</span> <span class="sd"> """</span> <span class="k">if</span> <span class="n">param_name</span> <span class="o">==</span> <span class="s1">'abstract'</span><span class="p">:</span> <span class="k">return</span> <span class="n">abstract_extraction</span><span class="p">(</span><span class="n">json_data</span><span class="p">)</span> @@ -418,8 +418,8 @@ <h1>Source code for 055_documents2TTL.documents2ttl</h1><div class="highlight">< <span class="sd"> :param data: 'CDS' part of the initial JSON</span> <span class="sd"> :type data: dict</span> -<span class="sd"> :return: abstract</span> -<span class="sd"> :rtype: str</span> +<span class="sd"> :return: abstract or None if it was not found</span> +<span class="sd"> :rtype: str or NoneType</span> <span class="sd"> """</span> <span class="n">result</span> <span class="o">=</span> <span class="kc">None</span> <span class="k">if</span> <span class="s1">'abstract'</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span> @@ -442,8 +442,8 @@ <h1>Source code for 055_documents2TTL.documents2ttl</h1><div class="highlight">< <span class="sd"> :param data: 'CDS' part of the initial JSON</span> <span class="sd"> :type data: dict</span> -<span class="sd"> :return: title</span> -<span class="sd"> :rtype: str</span> +<span class="sd"> :return: title or None if it was not found</span> +<span class="sd"> :rtype: str or NoneType</span> <span class="sd"> """</span> <span class="k">if</span> <span class="s1">'title'</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span> <span class="k">return</span> <span class="n">fix_string</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'title'</span><span class="p">)</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'title'</span><span class="p">))</span></div> @@ -455,8 +455,8 @@ <h1>Source code for 055_documents2TTL.documents2ttl</h1><div class="highlight">< <span class="sd"> :param data: 'CDS' part of the initial JSON</span> <span class="sd"> :type data: dict</span> -<span class="sd"> :return: CDS id</span> -<span class="sd"> :rtype: int</span> +<span class="sd"> :return: CDS id or None if it was not found</span> +<span class="sd"> :rtype: int or NoneType</span> <span class="sd"> """</span> <span class="k">if</span> <span class="s1">'recid'</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span> <span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'recid'</span><span class="p">))</span></div> @@ -468,8 +468,8 @@ <h1>Source code for 055_documents2TTL.documents2ttl</h1><div class="highlight">< <span class="sd"> :param data: 'CDS' part of the initial JSON</span> <span class="sd"> :type data: dict</span> -<span class="sd"> :return: creation date</span> -<span class="sd"> :rtype: str</span> +<span class="sd"> :return: creation date or None if it was not found</span> +<span class="sd"> :rtype: str or NoneType</span> <span class="sd"> """</span> <span class="k">if</span> <span class="s1">'creation_date'</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span> <span class="k">return</span> <span class="n">fix_string</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'creation_date'</span><span class="p">))</span></div> @@ -481,8 +481,8 @@ <h1>Source code for 055_documents2TTL.documents2ttl</h1><div class="highlight">< <span class="sd"> :param data: 'CDS' part of the initial JSON</span> <span class="sd"> :type data: dict</span> -<span class="sd"> :return: arXiv code</span> -<span class="sd"> :rtype: str</span> +<span class="sd"> :return: arXiv code or None if it was not found</span> +<span class="sd"> :rtype: str or NoneType</span> <span class="sd"> """</span> <span class="k">if</span> <span class="s1">'primary_report_number'</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span> <span class="n">report_number</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'primary_report_number'</span><span class="p">)</span> diff --git a/Docs/build/html/searchindex.js b/Docs/build/html/searchindex.js index ecd19f662..4d3717298 100644 --- a/Docs/build/html/searchindex.js +++ b/Docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["Stages/055_documents2TTL/055_documents2TTL","Stages/Stages","index","pyDKB/pyDKB","pyDKB/pyDKB.common","pyDKB/pyDKB.common.LoggableObject","pyDKB/pyDKB.common.Type","pyDKB/pyDKB.common.custom_readline","pyDKB/pyDKB.common.exceptions","pyDKB/pyDKB.common.hdfs","pyDKB/pyDKB.common.json_utils","pyDKB/pyDKB.common.misc","pyDKB/pyDKB.common.types","pyDKB/pyDKB.dataflow","pyDKB/pyDKB.dataflow.cds","pyDKB/pyDKB.dataflow.communication","pyDKB/pyDKB.dataflow.communication.consumer","pyDKB/pyDKB.dataflow.communication.consumer.Consumer","pyDKB/pyDKB.dataflow.communication.consumer.FileConsumer","pyDKB/pyDKB.dataflow.communication.consumer.HDFSConsumer","pyDKB/pyDKB.dataflow.communication.consumer.StreamConsumer","pyDKB/pyDKB.dataflow.communication.messages","pyDKB/pyDKB.dataflow.communication.producer","pyDKB/pyDKB.dataflow.communication.producer.FileProducer","pyDKB/pyDKB.dataflow.communication.producer.HDFSProducer","pyDKB/pyDKB.dataflow.communication.producer.Producer","pyDKB/pyDKB.dataflow.communication.producer.StreamProducer","pyDKB/pyDKB.dataflow.communication.stream","pyDKB/pyDKB.dataflow.communication.stream.InputStream","pyDKB/pyDKB.dataflow.communication.stream.OutputStream","pyDKB/pyDKB.dataflow.communication.stream.Stream","pyDKB/pyDKB.dataflow.communication.stream.exceptions","pyDKB/pyDKB.dataflow.dkbID","pyDKB/pyDKB.dataflow.exceptions","pyDKB/pyDKB.dataflow.stage","pyDKB/pyDKB.dataflow.stage.AbstractStage","pyDKB/pyDKB.dataflow.stage.ProcessorStage","pyDKB/pyDKB.dataflow.types"],envversion:52,filenames:["Stages/055_documents2TTL/055_documents2TTL.rst","Stages/Stages.rst","index.rst","pyDKB/pyDKB.rst","pyDKB/pyDKB.common.rst","pyDKB/pyDKB.common.LoggableObject.rst","pyDKB/pyDKB.common.Type.rst","pyDKB/pyDKB.common.custom_readline.rst","pyDKB/pyDKB.common.exceptions.rst","pyDKB/pyDKB.common.hdfs.rst","pyDKB/pyDKB.common.json_utils.rst","pyDKB/pyDKB.common.misc.rst","pyDKB/pyDKB.common.types.rst","pyDKB/pyDKB.dataflow.rst","pyDKB/pyDKB.dataflow.cds.rst","pyDKB/pyDKB.dataflow.communication.rst","pyDKB/pyDKB.dataflow.communication.consumer.rst","pyDKB/pyDKB.dataflow.communication.consumer.Consumer.rst","pyDKB/pyDKB.dataflow.communication.consumer.FileConsumer.rst","pyDKB/pyDKB.dataflow.communication.consumer.HDFSConsumer.rst","pyDKB/pyDKB.dataflow.communication.consumer.StreamConsumer.rst","pyDKB/pyDKB.dataflow.communication.messages.rst","pyDKB/pyDKB.dataflow.communication.producer.rst","pyDKB/pyDKB.dataflow.communication.producer.FileProducer.rst","pyDKB/pyDKB.dataflow.communication.producer.HDFSProducer.rst","pyDKB/pyDKB.dataflow.communication.producer.Producer.rst","pyDKB/pyDKB.dataflow.communication.producer.StreamProducer.rst","pyDKB/pyDKB.dataflow.communication.stream.rst","pyDKB/pyDKB.dataflow.communication.stream.InputStream.rst","pyDKB/pyDKB.dataflow.communication.stream.OutputStream.rst","pyDKB/pyDKB.dataflow.communication.stream.Stream.rst","pyDKB/pyDKB.dataflow.communication.stream.exceptions.rst","pyDKB/pyDKB.dataflow.dkbID.rst","pyDKB/pyDKB.dataflow.exceptions.rst","pyDKB/pyDKB.dataflow.stage.rst","pyDKB/pyDKB.dataflow.stage.AbstractStage.rst","pyDKB/pyDKB.dataflow.stage.ProcessorStage.rst","pyDKB/pyDKB.dataflow.types.rst"],objects:{"":{pyDKB:[3,0,0,"-"]},"055_documents2TTL":{documents2ttl:[0,0,0,"-"]},"055_documents2TTL.documents2ttl":{abstract_extraction:[0,1,1,""],arxiv_extraction:[0,1,1,""],cds_id_extraction:[0,1,1,""],cds_internal_extraction:[0,1,1,""],cds_parameter_extraction:[0,1,1,""],creation_date_extraction:[0,1,1,""],define_globals:[0,1,1,""],document_cds:[0,1,1,""],document_glance:[0,1,1,""],document_links:[0,1,1,""],doi2ttl:[0,1,1,""],fix_list_values:[0,1,1,""],fix_string:[0,1,1,""],generate_journal_id:[0,1,1,""],get_document_iri:[0,1,1,""],glance_parameter_extraction:[0,1,1,""],keywords2ttl:[0,1,1,""],main:[0,1,1,""],process:[0,1,1,""],process_journals:[0,1,1,""],report_number_extraction:[0,1,1,""],title_extraction:[0,1,1,""]},"pyDKB.common":{LoggableObject:[5,0,0,"-"],Type:[6,0,0,"-"],custom_readline:[7,0,0,"-"],exceptions:[8,0,0,"-"],hdfs:[9,0,0,"-"],json_utils:[10,0,0,"-"],misc:[11,0,0,"-"],types:[12,0,0,"-"]},"pyDKB.common.LoggableObject":{LoggableObject:[5,2,1,""]},"pyDKB.common.LoggableObject.LoggableObject":{log:[5,3,1,""]},"pyDKB.common.Type":{Type:[6,2,1,""]},"pyDKB.common.Type.Type":{add:[6,4,1,""],hasMember:[6,4,1,""],member:[6,4,1,""],memberName:[6,4,1,""]},"pyDKB.common.custom_readline":{custom_readline:[7,1,1,""]},"pyDKB.common.exceptions":{HDFSException:[8,5,1,""]},"pyDKB.common.hdfs":{File:[9,1,1,""],basename:[9,1,1,""],check_stderr:[9,1,1,""],dirname:[9,1,1,""],getfile:[9,1,1,""],join:[9,1,1,""],listdir:[9,1,1,""],makedirs:[9,1,1,""],movefile:[9,1,1,""],putfile:[9,1,1,""]},"pyDKB.common.json_utils":{nestedKeys:[10,1,1,""],valueByKey:[10,1,1,""]},"pyDKB.common.misc":{log:[11,1,1,""]},"pyDKB.dataflow":{cds:[14,0,0,"-"],communication:[15,0,0,"-"],dkbID:[32,0,0,"-"],exceptions:[33,0,0,"-"],stage:[34,0,0,"-"],types:[37,0,0,"-"]},"pyDKB.dataflow.cds":{CDSInvenioConnector:[14,2,1,""],KerberizedCDSInvenioConnector:[14,2,1,""]},"pyDKB.dataflow.cds.CDSInvenioConnector":{"delete":[14,4,1,""],handlers:[14,6,1,""],kill:[14,4,1,""],orig_handlers:[14,6,1,""]},"pyDKB.dataflow.communication":{Message:[15,1,1,""],consumer:[16,0,0,"-"],messages:[21,0,0,"-"],producer:[22,0,0,"-"],stream:[27,0,0,"-"]},"pyDKB.dataflow.communication.consumer":{Consumer:[17,0,0,"-"],ConsumerBuilder:[16,2,1,""],FileConsumer:[18,0,0,"-"],HDFSConsumer:[19,0,0,"-"],StreamConsumer:[20,0,0,"-"]},"pyDKB.dataflow.communication.consumer.Consumer":{Consumer:[17,2,1,""],ConsumerException:[17,5,1,""]},"pyDKB.dataflow.communication.consumer.Consumer.Consumer":{close:[17,4,1,""],config:[17,6,1,""],get_message:[17,4,1,""],get_source_info:[17,4,1,""],get_stream:[17,4,1,""],init_stream:[17,4,1,""],message_class:[17,4,1,""],message_type:[17,6,1,""],next:[17,4,1,""],reconfigure:[17,4,1,""],reset_stream:[17,4,1,""],set_message_type:[17,4,1,""],stream_is_readable:[17,4,1,""]},"pyDKB.dataflow.communication.consumer.ConsumerBuilder":{build:[16,4,1,""],consumerClass:[16,6,1,""],setSource:[16,4,1,""],setType:[16,4,1,""]},"pyDKB.dataflow.communication.consumer.FileConsumer":{FileConsumer:[18,2,1,""]},"pyDKB.dataflow.communication.consumer.FileConsumer.FileConsumer":{current_file:[18,6,1,""],get_source:[18,4,1,""],get_source_info:[18,4,1,""],init_sources:[18,4,1,""],next_source:[18,4,1,""],reconfigure:[18,4,1,""],source_is_readable:[18,4,1,""]},"pyDKB.dataflow.communication.consumer.HDFSConsumer":{HDFSConsumer:[19,2,1,""]},"pyDKB.dataflow.communication.consumer.HDFSConsumer.HDFSConsumer":{reconfigure:[19,4,1,""]},"pyDKB.dataflow.communication.consumer.StreamConsumer":{StreamConsumer:[20,2,1,""]},"pyDKB.dataflow.communication.consumer.StreamConsumer.StreamConsumer":{fd:[20,6,1,""],get_source:[20,4,1,""],get_source_info:[20,4,1,""],next_source:[20,4,1,""],reconfigure:[20,4,1,""]},"pyDKB.dataflow.communication.messages":{AbstractMessage:[21,2,1,""],DecodeUnknownType:[21,5,1,""],EncodeUnknownType:[21,5,1,""],JSONMessage:[21,2,1,""],Message:[21,1,1,""],TTLMessage:[21,2,1,""]},"pyDKB.dataflow.communication.messages.AbstractMessage":{content:[21,4,1,""],decode:[21,4,1,""],decoded:[21,6,1,""],encode:[21,4,1,""],encoded:[21,6,1,""],extension:[21,3,1,""],getOriginal:[21,4,1,""],incompl:[21,6,1,""],incomplete:[21,4,1,""],msg_type:[21,6,1,""],native_types:[21,6,1,""],typeName:[21,3,1,""]},"pyDKB.dataflow.communication.messages.JSONMessage":{decode:[21,4,1,""],encode:[21,4,1,""],incompl_key:[21,6,1,""],incomplete:[21,4,1,""],msg_type:[21,6,1,""],native_types:[21,6,1,""]},"pyDKB.dataflow.communication.messages.TTLMessage":{decode:[21,4,1,""],encode:[21,4,1,""],msg_type:[21,6,1,""],native_types:[21,6,1,""]},"pyDKB.dataflow.communication.producer":{FileProducer:[23,0,0,"-"],HDFSProducer:[24,0,0,"-"],Producer:[25,0,0,"-"],ProducerBuilder:[22,2,1,""],StreamProducer:[26,0,0,"-"]},"pyDKB.dataflow.communication.producer.FileProducer":{FileProducer:[23,2,1,""]},"pyDKB.dataflow.communication.producer.FileProducer.FileProducer":{close:[23,4,1,""],close_file:[23,4,1,""],config_dir:[23,4,1,""],current_file:[23,6,1,""],default_dir:[23,4,1,""],dirname:[23,4,1,""],ensure_dir:[23,4,1,""],file_info:[23,4,1,""],get_dest:[23,4,1,""],get_dest_info:[23,4,1,""],get_dir:[23,4,1,""],get_filename:[23,4,1,""],get_source_info:[23,4,1,""],reconfigure:[23,4,1,""],reset_file:[23,4,1,""],set_default_dir:[23,4,1,""],subdir:[23,4,1,""]},"pyDKB.dataflow.communication.producer.HDFSProducer":{HDFSProducer:[24,2,1,""]},"pyDKB.dataflow.communication.producer.HDFSProducer.HDFSProducer":{close_file:[24,4,1,""],config_dir:[24,4,1,""],ensure_dir:[24,4,1,""],file_info:[24,4,1,""],set_default_dir:[24,4,1,""],subdir:[24,4,1,""]},"pyDKB.dataflow.communication.producer.Producer":{Producer:[25,2,1,""],ProducerException:[25,5,1,""]},"pyDKB.dataflow.communication.producer.Producer.Producer":{close:[25,4,1,""],config:[25,6,1,""],drop:[25,4,1,""],eop:[25,4,1,""],flush:[25,4,1,""],get_dest:[25,4,1,""],get_dest_info:[25,4,1,""],get_stream:[25,4,1,""],init_stream:[25,4,1,""],message_class:[25,4,1,""],message_type:[25,6,1,""],reconfigure:[25,4,1,""],reset_stream:[25,4,1,""],set_message_type:[25,4,1,""],write:[25,4,1,""]},"pyDKB.dataflow.communication.producer.ProducerBuilder":{build:[22,4,1,""],message_type:[22,6,1,""],producerClass:[22,6,1,""],setDest:[22,4,1,""],setSourceInfoMethod:[22,4,1,""],setType:[22,4,1,""],src_info:[22,6,1,""]},"pyDKB.dataflow.communication.producer.StreamProducer":{StreamProducer:[26,2,1,""]},"pyDKB.dataflow.communication.producer.StreamProducer.StreamProducer":{fd:[26,6,1,""],get_dest:[26,4,1,""],get_dest_info:[26,4,1,""],reconfigure:[26,4,1,""]},"pyDKB.dataflow.communication.stream":{InputStream:[28,0,0,"-"],OutputStream:[29,0,0,"-"],Stream:[30,0,0,"-"],StreamBuilder:[27,2,1,""],exceptions:[31,0,0,"-"]},"pyDKB.dataflow.communication.stream.InputStream":{InputStream:[28,2,1,""],get_message:[27,4,1,""],is_readable:[27,4,1,""],next:[27,4,1,""],parse_message:[27,4,1,""],reset:[27,4,1,""]},"pyDKB.dataflow.communication.stream.InputStream.InputStream":{get_message:[28,4,1,""],is_readable:[28,4,1,""],next:[28,4,1,""],parse_message:[28,4,1,""],reset:[28,4,1,""]},"pyDKB.dataflow.communication.stream.OutputStream":{OutputStream:[29,2,1,""],configure:[27,4,1,""],drop:[27,4,1,""],eop:[27,4,1,""],flush:[27,4,1,""],msg_buffer:[27,6,1,""],write:[27,4,1,""]},"pyDKB.dataflow.communication.stream.OutputStream.OutputStream":{configure:[29,4,1,""],drop:[29,4,1,""],eop:[29,4,1,""],flush:[29,4,1,""],msg_buffer:[29,6,1,""],write:[29,4,1,""]},"pyDKB.dataflow.communication.stream.Stream":{EOM:[27,6,1,""],Stream:[30,2,1,""],close:[27,4,1,""],configure:[27,4,1,""],get_fd:[27,4,1,""],message_type:[27,4,1,""],reset:[27,4,1,""],set_message_type:[27,4,1,""]},"pyDKB.dataflow.communication.stream.Stream.Stream":{EOM:[30,6,1,""],close:[30,4,1,""],configure:[30,4,1,""],get_fd:[30,4,1,""],message_type:[30,4,1,""],reset:[30,4,1,""],set_message_type:[30,4,1,""]},"pyDKB.dataflow.communication.stream.StreamBuilder":{build:[27,4,1,""],message_type:[27,6,1,""],setStream:[27,4,1,""],setType:[27,4,1,""],streamClass:[27,6,1,""]},"pyDKB.dataflow.communication.stream.exceptions":{StreamException:[31,5,1,""]},"pyDKB.dataflow.dkbID":{dkbID:[32,1,1,""]},"pyDKB.dataflow.exceptions":{DataflowException:[33,5,1,""]},"pyDKB.dataflow.stage":{AbstractStage:[35,0,0,"-"],ProcessorStage:[36,0,0,"-"]},"pyDKB.dataflow.stage.AbstractStage":{AbstractStage:[35,2,1,""]},"pyDKB.dataflow.stage.AbstractStage.AbstractStage":{add_argument:[35,4,1,""],args_error:[35,4,1,""],config_error:[35,4,1,""],defaultArguments:[35,4,1,""],log_configuration:[35,4,1,""],output_error:[35,4,1,""],parse_args:[35,4,1,""],print_usage:[35,4,1,""],read_config:[35,4,1,""],run:[35,4,1,""],set_error:[35,4,1,""],stop:[35,4,1,""]},"pyDKB.dataflow.stage.ProcessorStage":{ProcessorStage:[36,2,1,""],clear_buffer:[34,4,1,""],configure:[34,4,1,""],defaultArguments:[34,4,1,""],flush_buffer:[34,4,1,""],forward:[34,4,1,""],get_source_info:[34,4,1,""],input:[34,4,1,""],input_message_class:[34,4,1,""],output:[34,4,1,""],output_message_class:[34,4,1,""],process:[34,7,1,""],run:[34,4,1,""],set_input_message_type:[34,4,1,""],set_output_message_type:[34,4,1,""],skip_process:[34,7,1,""],stop:[34,4,1,""]},"pyDKB.dataflow.stage.ProcessorStage.ProcessorStage":{clear_buffer:[36,4,1,""],configure:[36,4,1,""],defaultArguments:[36,4,1,""],flush_buffer:[36,4,1,""],forward:[36,4,1,""],get_source_info:[36,4,1,""],input:[36,4,1,""],input_message_class:[36,4,1,""],output:[36,4,1,""],output_message_class:[36,4,1,""],process:[36,7,1,""],run:[36,4,1,""],set_input_message_type:[36,4,1,""],set_output_message_type:[36,4,1,""],skip_process:[36,7,1,""],stop:[36,4,1,""]},pyDKB:{common:[4,0,0,"-"],dataflow:[13,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","function","Python function"],"2":["py","class","Python class"],"3":["py","classmethod","Python class method"],"4":["py","method","Python method"],"5":["py","exception","Python exception"],"6":["py","attribute","Python attribute"],"7":["py","staticmethod","Python static method"]},objtypes:{"0":"py:module","1":"py:function","2":"py:class","3":"py:classmethod","4":"py:method","5":"py:exception","6":"py:attribute","7":"py:staticmethod"},terms:{"055_documents2ttl":0,"abstract":[0,6,21,27,30,34,35,36],"boolean":3,"case":[11,14,34,35,36],"class":[5,6,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,34,35,36],"default":[3,23,24,34,36],"final":[34,36],"function":[0,3,7,11,34,36],"import":[3,7],"int":[0,6],"new":[3,17,18,21,25,27,28],"return":[0,3,6,7,9,10,15,16,17,18,20,21,22,23,24,25,26,27,28,30,32,34,35,36],"static":[18,23,24,34,36],"true":[0,3,6,7,14,17,18,25,27,28,30,34,35,36],"try":[9,25],CDS:0,For:[3,21],That:3,The:[0,34,36],There:3,__config:35,__input:[34,36],__iter:[27,28],__main__:3,__name__:3,__output:[34,36],__parser:35,__stoppabl:[34,36],_cds_attr:0,_err:35,_incomplet:21,_message_typ:3,about:[3,14,18,20,23,24,26,27,29,34,36],abstract_extract:0,abstractmessag:[21,34,36],abstractprocessorstag:0,abstractstag:[3,13,34,36],accept:0,accord:[3,19,23,34,36],achiev:[34,36],across:12,action:[34,36],actual:[7,25],add:[3,6,27,29,35],add_argu:35,added:6,addit:[11,21,34,36],all:[3,5,9,11,12,17,34,35,36],allow:[14,34,36],also:3,ancestor:5,anew:[34,36],ani:[0,3,11,23],anoth:7,anywher:[27,29],appl:6,appli:0,applic:3,apreduc:36,arg:[0,6,9,11,14,34,35,36],argpars:[0,35],args_error:35,argument:[0,3,6,17,25,34,35,36],argumentpars:35,argv:[0,3],arxiv:0,arxiv_extract:0,atla:0,author:0,avoid:[3,7],awar:14,base:[0,3,5,6,8,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36],base_dir:[23,24],basenam:9,believ:20,belong:11,better:[18,23,24],between:[10,11],bodi:11,bool:[0,6,17,18,21,27,28,34,36],browser:14,buffer:[25,27,29,34,36],build:[16,22,27],call:[0,3,7,34,36],can:[3,7,36],cds:[3,13],cds_attr:0,cds_id_extract:0,cds_internal_extract:0,cds_parameter_extract:0,cdsinvenioconnector:14,chain:10,chang:[34,36],check:[3,6,7,17,18,21,27,28],check_stderr:9,citi:3,classmethod:[5,21],clear_buff:[34,36],close:[14,17,23,24,25,27,28,30],close_fil:[23,24],cls:21,code:[0,3,9,21,35],com:7,combin:3,command:[0,3,34,36],common:[3,17,18,23,24,25,27,30,35],commuic:30,commun:[3,7,13,34,36],compat:3,config:[16,17,18,19,20,22,23,24,25,26,27,28,29,30,34,35,36],config_dir:[23,24],config_error:35,configpars:35,configur:[3,18,19,20,23,24,26,27,29,30,34,35,36],confirm:21,connect:0,connector:3,consider:0,construct:[0,7,16,22,23,24],constructor:[6,16,22,27],consum:[3,13,15,34,36],consumerbuild:16,consumerclass:16,consumerexcept:17,contain:[0,10,11],containspubl:0,content:[0,2,3,21],context:3,contrib:14,convert:0,copi:9,correspond:[3,21,23],creat:[3,9,27],creation:[0,3],creation_date_extract:0,current:[0,3,9,17,18,20,21,22,23,24,25,26,27,28,34,36],current_fil:[18,23],currenti:20,custom:[7,35],custom_readlin:[3,4],data:[0,3,7,16,17,18,19,20,21,22,23,24,25,26,32,34,36],data_typ:32,dataflow:[0,3,7],dataflowexcept:[17,25,31,33],date:[0,3],decid:3,decod:[3,21],decodeunknowntyp:21,def:3,default_dir:23,defaultargu:[34,35,36],defaultdict:35,defin:[0,18],define_glob:0,definit:[6,8,12,21,33,34,35,36,37],del:14,delet:14,delimet:7,depend:35,descript:[34,35,36],descriptor:[18,20,23,26,27,28,30,34,36],dest:[3,9,22,23,24,25,26,36],destin:[3,22,23,25],detail:3,develop:3,dfs:36,dict:[0,3,10,21],dir:[23,36],directori:[9,23,24,36],dirnam:[9,23],dkb:[0,3,7,32,33,34,35,36],dkbid:[0,3,13],doc_id:0,doc_iri:0,document:[0,3],document_cd:0,document_gl:0,document_link:0,documents2ttl:0,doe:6,doesn:0,doi2ttl:0,doi:0,dot:10,download:9,drop:[3,25,27,29,34,36],each:0,easi:3,elif:6,els:25,empti:[11,17,18,27,28],encod:[3,21],encodeunknowntyp:21,end:[9,27,29],ensur:[23,24],ensure_dir:[23,24],eom:[27,30],eop:[25,27,29],eopmark:[34,36],equival:36,err_trac:35,err_typ:35,err_val:35,error:[7,35],escap:0,etc:[20,26,27,30,34,36],etl:3,even:[20,26],everi:[3,7,34,36],exampl:[0,3,6],exc_info:35,except:[3,4,13,15,17,21,25,27,30],execut:3,exist:[6,23,24],exit:[34,35,36],expect:[34,36],extend:14,extens:21,extract:[0,3],fail:[3,17,27,28,34,35,36],failur:[34,36],fals:[3,6,14,17,18,27,28,34,35,36],fcntl:7,field:[21,34,36],file:[0,3,7,9,16,18,20,21,22,23,24,26,27,28,30,34,35,36],file_info:[23,24],fileconsum:[3,13,15,16,19,20],filenam:[9,23],fileproduc:[3,13,15,22,24,26],filter:3,finish:[34,36],first:[3,11,34,36],fix:0,fix_list_valu:0,fix_str:0,flow:[3,7],flush:[25,27,29,34,36],flush_buff:[34,36],fname:9,follow:[0,3],forc:[27,28],formal:21,format:[3,21,34,36],forward:[34,36],found:7,frame:14,from:[0,3,7,9,17,18,21,23,24,27,28,34,35,36],full:[3,23,24],fulli:[3,18],further:0,futur:3,gener:[0,3,7,32,34,36],generate_journal_id:0,get:[0,3,9,17,18,20,21,22,23,25,26,27,28,30,34,36],get_dest:[23,25,26],get_dest_info:[23,25,26],get_dir:23,get_document_iri:0,get_fd:[27,30],get_filenam:23,get_messag:[17,27,28],get_sourc:[18,20],get_source_info:[17,18,20,23,34,36],get_stream:[17,25],getfil:9,getorigin:21,github:7,given:[5,9,11],glanc:0,glance_attr:0,glance_parameter_extract:0,global:0,graph:0,gssapi:14,guid:2,handl:7,handler:[3,14],has:0,hasabstract:0,hasarxivcod:0,hascds_id:0,hascdsintern:0,hascdsreportnumb:0,hascreationd:0,hasdoi:0,hasfulltitl:0,hasglance_id:0,hash:23,haskeyword:0,haslabel:0,hasmemb:6,hasrefcod:0,hasshorttitl:0,hastitl:0,hasurl:0,hasvolum:0,hasyear:0,have:[0,3],hdf:[3,4,8,18,19,24,36],hdfsconsum:[3,13,15,16],hdfsexcept:8,hdfsproduc:[3,13,15,22],hook:0,http:7,identifi:32,ignor:11,ile:36,iles:36,implement:[0,3,7,17,18,19,20,21,23,24,25,26,27,28,29,34,36],includ:23,incompl:21,incompl_kei:21,incomplet:[7,21,34,36],independ:3,index:2,indic:3,individu:[34,36],info:[17,18,20,22,23,25,26,35],inform:[3,34,35,36],init:[16,17,22,25,34],init_sourc:18,init_stream:[17,25],initi:[0,17,18,25,27,28],input:[0,3,7,17,20,25,27,28,30,34,36],input_data:3,input_messag:[34,36],input_message_class:[34,36],inputstream:[3,13,15,17,27],insid:[34,36],instanc:[0,3,16,22,27,29,34,35,36],instead:7,integr:3,intend:3,interact:9,intern:[0,3],interrupt:7,invenio_cli:14,iobas:[27,30],iri:0,is_read:[27,28],isbasedon:0,issu:0,item:0,iter:[7,18,34,36],its:[0,9,34,36],itself:10,jame:3,john:3,join:9,jonathan:3,journal:0,journal_dict:0,journal_issu:0,journalissu:0,json:[0,3,10,21],json_data:[0,10,32],json_util:[3,4],jsonmessag:[3,21],jsonprocessorstag:3,just:[34,36],kafka:3,keep:[3,34,36],kei:[3,10],kerberizedcdsinvenioconnector:14,kerbero:14,keyword:0,keywords2ttl:0,kill:14,knowledg:3,known:25,kwarg:35,languag:3,last:[7,25,35],left:[7,17,18,27,28],let:3,level:[5,11],librari:[3,7,12,37],like:[7,20,26,34,36],line:[0,3,7,11,34,36],link:[0,17,25],list:[0,3,9,10,11,34,36],list_val:0,listdir:9,load:3,local:[9,23,36],local_path:23,log:[5,11,35],log_configur:35,loggableobject:[3,4,17,25,27,30,35],logic:3,login:14,loglevel:[5,11],look:0,mai:[7,34,36],main:0,make:7,makedir:9,manag:3,mani:3,mark:[10,34,36],marker:[21,25],matter:21,max_lin:9,mean:[0,3],meaning:32,mechan:14,member:[5,6,11],membernam:6,merg:[20,26],messag:[0,3,5,7,11,13,15,16,17,22,25,27,28,29,30,34,35,36],message_class:[17,25],message_typ:[17,22,25,27,30],messagetyp:3,metadata:[0,3,23,24],method:[5,7,22,27,28,34,36],middl:7,mint:3,misc:[3,4],miscellani:11,mode:[3,9,34,35,36],modul:[0,2,3,4,13,15,16,22,27,34],more:7,most:14,move:[7,9,24],movefil:9,msg:[0,25],msg_buffer:[27,29],msg_type:[15,21,27,30],multilin:11,multipl:[20,23,24,26],must:[3,10,34,36],my_process:3,mytyp:6,name:[0,3,6,7,9,20,21,23,24,26,36],namespac:[0,35],native_typ:21,ndjson:3,nearest:18,necessari:[34,36],need:[3,5,35],negoti:14,nest:10,nestedkei:10,newlin:7,next:[7,11,17,18,27,28],next_sourc:[18,20],node:3,non:[0,18],none:[9,16,17,18,20,21,22,23,25,26,27,28,29,30,34,35,36],nonetyp:[17,18,21,27,28],note:[34,36],note_cds_attr:0,note_glance_attr:0,noth:7,notimplementederror:21,now:3,number:[0,20,26],object:[0,3,5,6,7,9,10,11,16,17,21,22,27,28,30,32,34,35,36,37],onc:3,one:[7,14,20,23],ones:3,onli:[3,9,11,20],ontolog:0,open:[9,17,23,25,27,30,35],oper:[3,27,30,31],option:[7,36],orang:6,organ:13,orig_handl:14,origin:21,other:[0,11],our:3,out:3,out_data:3,out_messag:3,output:[0,3,5,9,11,23,24,25,27,29,30,34,35,36],output_error:35,output_messag:[34,36],output_message_class:[34,36],outputstream:[3,13,15,27],over:7,overrid:[27,28],packag:2,pandawm:7,paper:0,paper_cds_attr:0,paper_glance_attr:0,parallel:[23,24],param_nam:0,paramet:[0,5,6,7,9,10,11,19,21,25,27,30,34,35,36],parent:[9,27,28],pars:[0,3,17,27,28,34,35,36],parse_arg:35,parse_messag:[27,28],parser:[34,35,36],part:[0,3,34,36],pass:[3,6,15,21,35],password:14,path:[9,23,24],perform:[3,34,36],pipe:[18,20,23,24,26],pipelin:3,pleas:3,plum:6,point:[18,23,24],popul:3,posit:6,prefer:23,prefix:11,prehap:[20,26],prepar:[34,36],prepend:11,previou:[3,21,27,30],print:[6,35],print_usag:35,proc:9,process:[0,3,27,29,34,35,36],process_journ:0,processor:[3,34,36],processorstag:[0,3,13,34],produc:[3,13,15],producerbuild:22,producerclass:22,producerexcept:25,program:3,propag:14,properti:[27,28],protect:14,provid:11,pull:7,put:[10,25,34,36],putfil:9,pydkb:[0,2],python:3,quickstart:2,quotat:10,rais:[7,21,27,30],reach:3,read:[7,17,18,35],read_config:35,readabl:[7,17,18,27,28],readi:3,readlin:7,real:25,receiv:0,reconfigur:[17,18,19,20,23,25,26],record:11,rel:36,remov:23,replac:11,report:0,report_number_extract:0,repres:[11,14],requir:[34,36],reset:[17,18,21,23,25,27,28,30],reset_fil:23,reset_stream:[17,25],restore_handl:14,result:0,rethink:7,retriev:6,retrun:[27,28],rewritten:36,right:3,robin:18,role:3,round:18,run:[0,3,14,34,35,36],runtimeerror:8,safeconfigpars:35,same:[11,14],sampl:3,seamless:[34,36],search:[2,10],section:3,semant:[34,36],send:[7,9,27,29,34,36],sentenc:0,separ:[7,10],sequenc:0,set:[3,16,17,21,22,23,24,25,27,30,34,35,36],set_:3,set_default_dir:[23,24],set_error:35,set_input_message_typ:[3,34,36],set_message_typ:[17,25,27,30],set_output_message_typ:[3,34,36],setdest:22,setsourc:16,setsourceinfomethod:22,setstream:27,settyp:[16,22,27],should:[0,3],signal:[14,27,29],signum:14,simpl:[3,14],simpliest:[34,36],simplifi:3,singl:[3,20,21,26,34,36],sink:3,sketchi:3,skip:[34,36],skip_process:[34,36],smallest:3,smith:3,some:[0,21,34,36],sourc:[0,3,5,6,7,8,9,10,11,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36],source_is_read:18,special:11,specif:[0,21,35],specifi:[34,36],spnego:14,src_info:22,stage:[2,3,13,17,18,25],standalon:3,start:3,statement:[3,21],statu:21,stderr:[9,35],step:3,still:0,stop:[34,35,36],stopiter:7,storag:3,str:[0,5,6,7,21,35],stream:[3,13,15,17,20,25,26,34,36],stream_is_read:17,streambuild:27,streamclass:27,streamconsum:[3,13,15,16,18],streamexcept:31,streamproduc:[3,13,15,22,23,24],strict:7,string:[0,6,10,11,21],structur:0,sub_dir:[23,24],subdir:[23,24],subdirectori:9,submodul:[3,7],subpackag:2,subprocess:9,success:[3,34,36],successfulli:[34,36],supervis:3,supervisor:[3,27,29],support:[0,3],supporting_docu:0,supporting_not:0,supportingdocu:0,symbol:11,sys:3,system:3,take:[3,21,34,36],taken:[3,36],task:0,tdin:36,tdout:[3,36],temporari:[9,23],temporaryfil:9,term:3,text:21,thei:11,them:3,thi:[0,3,7,14,20,21,26,34,36],think:[18,20,23,24,26],three:3,through:3,thrown:21,till:9,time:3,timeout:9,timestamp:[11,23],titl:0,title_extract:0,todo:[0,18,20,21,23,24,26,27,30],topolog:3,traceback:35,transform:[3,10,34,36],tream:[34,36],ttl:[0,21],ttlmessag:21,turn:[3,34,36],two:3,type:[0,3,4,5,7,11,13,15,16,17,18,21,22,25,27,28,30,32,34,36],typenam:21,unbroken:[34,36],unchang:0,under:[0,7],unicod:[0,21],uniqu:32,unit:3,until:7,updat:18,upload:9,usag:[0,35,36],use:25,used:[3,7,12],user:[3,14],util:[9,10,11,13,32],val:6,valu:[0,3,6,7,9,10,17,18,21,27,28,34,36],valuebykei:10,valueerror:21,variabl:[0,34,35,36],verifi:[27,28],version:3,via:[0,14],volum:0,wai:[34,36],wait:9,walnut:6,welcom:3,were:3,what:7,when:[7,21],where:[15,21,36],which:[3,14],whitespac:11,window:7,wip:3,without:[3,7,9,25,27,29,34,36],work:[3,10],worker:3,workflow:0,write:[3,7,25,27,29],written:3,wrong:6,wrong_str:0,xxx:[15,21],xxxmessag:[15,21],year:0,yet:[18,32],yield:7,york:3,your:3},titles:["Stage 055","Stages","Welcome to Data Knowledge Base documentation pages","pyDKB package","pyDKB.common package","pyDKB.common.LoggableObject module","pyDKB.common.Type module","pyDKB.common.custom_readline module","pyDKB.common.exceptions module","pyDKB.common.hdfs module","pyDKB.common.json_utils module","pyDKB.common.misc module","pyDKB.common.types module","pyDKB.dataflow package","pyDKB.dataflow.cds module","pyDKB.dataflow.communication package","pyDKB.dataflow.communication.consumer package","pyDKB.dataflow.communication.consumer.Consumer module","pyDKB.dataflow.communication.consumer.FileConsumer module","pyDKB.dataflow.communication.consumer.HDFSConsumer module","pyDKB.dataflow.communication.consumer.StreamConsumer module","pyDKB.dataflow.communication.messages module","pyDKB.dataflow.communication.producer package","pyDKB.dataflow.communication.producer.FileProducer module","pyDKB.dataflow.communication.producer.HDFSProducer module","pyDKB.dataflow.communication.producer.Producer module","pyDKB.dataflow.communication.producer.StreamProducer module","pyDKB.dataflow.communication.stream package","pyDKB.dataflow.communication.stream.InputStream module","pyDKB.dataflow.communication.stream.OutputStream module","pyDKB.dataflow.communication.stream.Stream module","pyDKB.dataflow.communication.stream.exceptions module","pyDKB.dataflow.dkbID module","pyDKB.dataflow.exceptions module","pyDKB.dataflow.stage package","pyDKB.dataflow.stage.AbstractStage module","pyDKB.dataflow.stage.ProcessorStage module","pyDKB.dataflow.types module"],titleterms:{abstractstag:35,base:2,cds:14,common:[4,5,6,7,8,9,10,11,12],commun:[15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],consum:[16,17,18,19,20],custom_readlin:7,data:2,dataflow:[13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37],dkbid:32,document:2,except:[8,31,33],fileconsum:18,fileproduc:23,guid:3,hdf:9,hdfsconsum:19,hdfsproduc:24,indic:2,inputstream:28,json_util:10,knowledg:2,loggableobject:5,messag:21,misc:11,modul:[5,6,7,8,9,10,11,12,14,17,18,19,20,21,23,24,25,26,28,29,30,31,32,33,35,36,37],outputstream:29,packag:[3,4,13,15,16,22,27,34],page:2,processorstag:36,produc:[22,23,24,25,26],pydkb:[3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37],quickstart:3,stage:[0,1,34,35,36],stream:[27,28,29,30,31],streamconsum:20,streamproduc:26,submodul:[4,13,15,16,22,27,34],subpackag:[3,13,15],tabl:2,todo:7,type:[6,12,37],welcom:2}}) \ No newline at end of file +Search.setIndex({docnames:["Stages/055_documents2TTL/055_documents2TTL","Stages/Stages","index","pyDKB/pyDKB","pyDKB/pyDKB.common","pyDKB/pyDKB.common.LoggableObject","pyDKB/pyDKB.common.Type","pyDKB/pyDKB.common.custom_readline","pyDKB/pyDKB.common.exceptions","pyDKB/pyDKB.common.hdfs","pyDKB/pyDKB.common.json_utils","pyDKB/pyDKB.common.misc","pyDKB/pyDKB.common.types","pyDKB/pyDKB.dataflow","pyDKB/pyDKB.dataflow.cds","pyDKB/pyDKB.dataflow.communication","pyDKB/pyDKB.dataflow.communication.consumer","pyDKB/pyDKB.dataflow.communication.consumer.Consumer","pyDKB/pyDKB.dataflow.communication.consumer.FileConsumer","pyDKB/pyDKB.dataflow.communication.consumer.HDFSConsumer","pyDKB/pyDKB.dataflow.communication.consumer.StreamConsumer","pyDKB/pyDKB.dataflow.communication.messages","pyDKB/pyDKB.dataflow.communication.producer","pyDKB/pyDKB.dataflow.communication.producer.FileProducer","pyDKB/pyDKB.dataflow.communication.producer.HDFSProducer","pyDKB/pyDKB.dataflow.communication.producer.Producer","pyDKB/pyDKB.dataflow.communication.producer.StreamProducer","pyDKB/pyDKB.dataflow.communication.stream","pyDKB/pyDKB.dataflow.communication.stream.InputStream","pyDKB/pyDKB.dataflow.communication.stream.OutputStream","pyDKB/pyDKB.dataflow.communication.stream.Stream","pyDKB/pyDKB.dataflow.communication.stream.exceptions","pyDKB/pyDKB.dataflow.dkbID","pyDKB/pyDKB.dataflow.exceptions","pyDKB/pyDKB.dataflow.stage","pyDKB/pyDKB.dataflow.stage.AbstractStage","pyDKB/pyDKB.dataflow.stage.ProcessorStage","pyDKB/pyDKB.dataflow.types"],envversion:52,filenames:["Stages/055_documents2TTL/055_documents2TTL.rst","Stages/Stages.rst","index.rst","pyDKB/pyDKB.rst","pyDKB/pyDKB.common.rst","pyDKB/pyDKB.common.LoggableObject.rst","pyDKB/pyDKB.common.Type.rst","pyDKB/pyDKB.common.custom_readline.rst","pyDKB/pyDKB.common.exceptions.rst","pyDKB/pyDKB.common.hdfs.rst","pyDKB/pyDKB.common.json_utils.rst","pyDKB/pyDKB.common.misc.rst","pyDKB/pyDKB.common.types.rst","pyDKB/pyDKB.dataflow.rst","pyDKB/pyDKB.dataflow.cds.rst","pyDKB/pyDKB.dataflow.communication.rst","pyDKB/pyDKB.dataflow.communication.consumer.rst","pyDKB/pyDKB.dataflow.communication.consumer.Consumer.rst","pyDKB/pyDKB.dataflow.communication.consumer.FileConsumer.rst","pyDKB/pyDKB.dataflow.communication.consumer.HDFSConsumer.rst","pyDKB/pyDKB.dataflow.communication.consumer.StreamConsumer.rst","pyDKB/pyDKB.dataflow.communication.messages.rst","pyDKB/pyDKB.dataflow.communication.producer.rst","pyDKB/pyDKB.dataflow.communication.producer.FileProducer.rst","pyDKB/pyDKB.dataflow.communication.producer.HDFSProducer.rst","pyDKB/pyDKB.dataflow.communication.producer.Producer.rst","pyDKB/pyDKB.dataflow.communication.producer.StreamProducer.rst","pyDKB/pyDKB.dataflow.communication.stream.rst","pyDKB/pyDKB.dataflow.communication.stream.InputStream.rst","pyDKB/pyDKB.dataflow.communication.stream.OutputStream.rst","pyDKB/pyDKB.dataflow.communication.stream.Stream.rst","pyDKB/pyDKB.dataflow.communication.stream.exceptions.rst","pyDKB/pyDKB.dataflow.dkbID.rst","pyDKB/pyDKB.dataflow.exceptions.rst","pyDKB/pyDKB.dataflow.stage.rst","pyDKB/pyDKB.dataflow.stage.AbstractStage.rst","pyDKB/pyDKB.dataflow.stage.ProcessorStage.rst","pyDKB/pyDKB.dataflow.types.rst"],objects:{"":{pyDKB:[3,0,0,"-"]},"055_documents2TTL":{documents2ttl:[0,0,0,"-"]},"055_documents2TTL.documents2ttl":{abstract_extraction:[0,1,1,""],arxiv_extraction:[0,1,1,""],cds_id_extraction:[0,1,1,""],cds_internal_extraction:[0,1,1,""],cds_parameter_extraction:[0,1,1,""],creation_date_extraction:[0,1,1,""],define_globals:[0,1,1,""],document_cds:[0,1,1,""],document_glance:[0,1,1,""],document_links:[0,1,1,""],doi2ttl:[0,1,1,""],fix_list_values:[0,1,1,""],fix_string:[0,1,1,""],generate_journal_id:[0,1,1,""],get_document_iri:[0,1,1,""],glance_parameter_extraction:[0,1,1,""],keywords2ttl:[0,1,1,""],main:[0,1,1,""],process:[0,1,1,""],process_journals:[0,1,1,""],report_number_extraction:[0,1,1,""],title_extraction:[0,1,1,""]},"pyDKB.common":{LoggableObject:[5,0,0,"-"],Type:[6,0,0,"-"],custom_readline:[7,0,0,"-"],exceptions:[8,0,0,"-"],hdfs:[9,0,0,"-"],json_utils:[10,0,0,"-"],misc:[11,0,0,"-"],types:[12,0,0,"-"]},"pyDKB.common.LoggableObject":{LoggableObject:[5,2,1,""]},"pyDKB.common.LoggableObject.LoggableObject":{log:[5,3,1,""]},"pyDKB.common.Type":{Type:[6,2,1,""]},"pyDKB.common.Type.Type":{add:[6,4,1,""],hasMember:[6,4,1,""],member:[6,4,1,""],memberName:[6,4,1,""]},"pyDKB.common.custom_readline":{custom_readline:[7,1,1,""]},"pyDKB.common.exceptions":{HDFSException:[8,5,1,""]},"pyDKB.common.hdfs":{File:[9,1,1,""],basename:[9,1,1,""],check_stderr:[9,1,1,""],dirname:[9,1,1,""],getfile:[9,1,1,""],join:[9,1,1,""],listdir:[9,1,1,""],makedirs:[9,1,1,""],movefile:[9,1,1,""],putfile:[9,1,1,""]},"pyDKB.common.json_utils":{nestedKeys:[10,1,1,""],valueByKey:[10,1,1,""]},"pyDKB.common.misc":{log:[11,1,1,""]},"pyDKB.dataflow":{cds:[14,0,0,"-"],communication:[15,0,0,"-"],dkbID:[32,0,0,"-"],exceptions:[33,0,0,"-"],stage:[34,0,0,"-"],types:[37,0,0,"-"]},"pyDKB.dataflow.cds":{CDSInvenioConnector:[14,2,1,""],KerberizedCDSInvenioConnector:[14,2,1,""]},"pyDKB.dataflow.cds.CDSInvenioConnector":{"delete":[14,4,1,""],handlers:[14,6,1,""],kill:[14,4,1,""],orig_handlers:[14,6,1,""]},"pyDKB.dataflow.communication":{Message:[15,1,1,""],consumer:[16,0,0,"-"],messages:[21,0,0,"-"],producer:[22,0,0,"-"],stream:[27,0,0,"-"]},"pyDKB.dataflow.communication.consumer":{Consumer:[17,0,0,"-"],ConsumerBuilder:[16,2,1,""],FileConsumer:[18,0,0,"-"],HDFSConsumer:[19,0,0,"-"],StreamConsumer:[20,0,0,"-"]},"pyDKB.dataflow.communication.consumer.Consumer":{Consumer:[17,2,1,""],ConsumerException:[17,5,1,""]},"pyDKB.dataflow.communication.consumer.Consumer.Consumer":{close:[17,4,1,""],config:[17,6,1,""],get_message:[17,4,1,""],get_source_info:[17,4,1,""],get_stream:[17,4,1,""],init_stream:[17,4,1,""],message_class:[17,4,1,""],message_type:[17,6,1,""],next:[17,4,1,""],reconfigure:[17,4,1,""],reset_stream:[17,4,1,""],set_message_type:[17,4,1,""],stream_is_readable:[17,4,1,""]},"pyDKB.dataflow.communication.consumer.ConsumerBuilder":{build:[16,4,1,""],consumerClass:[16,6,1,""],setSource:[16,4,1,""],setType:[16,4,1,""]},"pyDKB.dataflow.communication.consumer.FileConsumer":{FileConsumer:[18,2,1,""]},"pyDKB.dataflow.communication.consumer.FileConsumer.FileConsumer":{current_file:[18,6,1,""],get_source:[18,4,1,""],get_source_info:[18,4,1,""],init_sources:[18,4,1,""],next_source:[18,4,1,""],reconfigure:[18,4,1,""],source_is_readable:[18,4,1,""]},"pyDKB.dataflow.communication.consumer.HDFSConsumer":{HDFSConsumer:[19,2,1,""]},"pyDKB.dataflow.communication.consumer.HDFSConsumer.HDFSConsumer":{reconfigure:[19,4,1,""]},"pyDKB.dataflow.communication.consumer.StreamConsumer":{StreamConsumer:[20,2,1,""]},"pyDKB.dataflow.communication.consumer.StreamConsumer.StreamConsumer":{fd:[20,6,1,""],get_source:[20,4,1,""],get_source_info:[20,4,1,""],next_source:[20,4,1,""],reconfigure:[20,4,1,""]},"pyDKB.dataflow.communication.messages":{AbstractMessage:[21,2,1,""],DecodeUnknownType:[21,5,1,""],EncodeUnknownType:[21,5,1,""],JSONMessage:[21,2,1,""],Message:[21,1,1,""],TTLMessage:[21,2,1,""]},"pyDKB.dataflow.communication.messages.AbstractMessage":{content:[21,4,1,""],decode:[21,4,1,""],decoded:[21,6,1,""],encode:[21,4,1,""],encoded:[21,6,1,""],extension:[21,3,1,""],getOriginal:[21,4,1,""],incompl:[21,6,1,""],incomplete:[21,4,1,""],msg_type:[21,6,1,""],native_types:[21,6,1,""],typeName:[21,3,1,""]},"pyDKB.dataflow.communication.messages.JSONMessage":{decode:[21,4,1,""],encode:[21,4,1,""],incompl_key:[21,6,1,""],incomplete:[21,4,1,""],msg_type:[21,6,1,""],native_types:[21,6,1,""]},"pyDKB.dataflow.communication.messages.TTLMessage":{decode:[21,4,1,""],encode:[21,4,1,""],msg_type:[21,6,1,""],native_types:[21,6,1,""]},"pyDKB.dataflow.communication.producer":{FileProducer:[23,0,0,"-"],HDFSProducer:[24,0,0,"-"],Producer:[25,0,0,"-"],ProducerBuilder:[22,2,1,""],StreamProducer:[26,0,0,"-"]},"pyDKB.dataflow.communication.producer.FileProducer":{FileProducer:[23,2,1,""]},"pyDKB.dataflow.communication.producer.FileProducer.FileProducer":{close:[23,4,1,""],close_file:[23,4,1,""],config_dir:[23,4,1,""],current_file:[23,6,1,""],default_dir:[23,4,1,""],dirname:[23,4,1,""],ensure_dir:[23,4,1,""],file_info:[23,4,1,""],get_dest:[23,4,1,""],get_dest_info:[23,4,1,""],get_dir:[23,4,1,""],get_filename:[23,4,1,""],get_source_info:[23,4,1,""],reconfigure:[23,4,1,""],reset_file:[23,4,1,""],set_default_dir:[23,4,1,""],subdir:[23,4,1,""]},"pyDKB.dataflow.communication.producer.HDFSProducer":{HDFSProducer:[24,2,1,""]},"pyDKB.dataflow.communication.producer.HDFSProducer.HDFSProducer":{close_file:[24,4,1,""],config_dir:[24,4,1,""],ensure_dir:[24,4,1,""],file_info:[24,4,1,""],set_default_dir:[24,4,1,""],subdir:[24,4,1,""]},"pyDKB.dataflow.communication.producer.Producer":{Producer:[25,2,1,""],ProducerException:[25,5,1,""]},"pyDKB.dataflow.communication.producer.Producer.Producer":{close:[25,4,1,""],config:[25,6,1,""],drop:[25,4,1,""],eop:[25,4,1,""],flush:[25,4,1,""],get_dest:[25,4,1,""],get_dest_info:[25,4,1,""],get_stream:[25,4,1,""],init_stream:[25,4,1,""],message_class:[25,4,1,""],message_type:[25,6,1,""],reconfigure:[25,4,1,""],reset_stream:[25,4,1,""],set_message_type:[25,4,1,""],write:[25,4,1,""]},"pyDKB.dataflow.communication.producer.ProducerBuilder":{build:[22,4,1,""],message_type:[22,6,1,""],producerClass:[22,6,1,""],setDest:[22,4,1,""],setSourceInfoMethod:[22,4,1,""],setType:[22,4,1,""],src_info:[22,6,1,""]},"pyDKB.dataflow.communication.producer.StreamProducer":{StreamProducer:[26,2,1,""]},"pyDKB.dataflow.communication.producer.StreamProducer.StreamProducer":{fd:[26,6,1,""],get_dest:[26,4,1,""],get_dest_info:[26,4,1,""],reconfigure:[26,4,1,""]},"pyDKB.dataflow.communication.stream":{InputStream:[28,0,0,"-"],OutputStream:[29,0,0,"-"],Stream:[30,0,0,"-"],StreamBuilder:[27,2,1,""],exceptions:[31,0,0,"-"]},"pyDKB.dataflow.communication.stream.InputStream":{InputStream:[28,2,1,""],get_message:[27,4,1,""],is_readable:[27,4,1,""],next:[27,4,1,""],parse_message:[27,4,1,""],reset:[27,4,1,""]},"pyDKB.dataflow.communication.stream.InputStream.InputStream":{get_message:[28,4,1,""],is_readable:[28,4,1,""],next:[28,4,1,""],parse_message:[28,4,1,""],reset:[28,4,1,""]},"pyDKB.dataflow.communication.stream.OutputStream":{OutputStream:[29,2,1,""],configure:[27,4,1,""],drop:[27,4,1,""],eop:[27,4,1,""],flush:[27,4,1,""],msg_buffer:[27,6,1,""],write:[27,4,1,""]},"pyDKB.dataflow.communication.stream.OutputStream.OutputStream":{configure:[29,4,1,""],drop:[29,4,1,""],eop:[29,4,1,""],flush:[29,4,1,""],msg_buffer:[29,6,1,""],write:[29,4,1,""]},"pyDKB.dataflow.communication.stream.Stream":{EOM:[27,6,1,""],Stream:[30,2,1,""],close:[27,4,1,""],configure:[27,4,1,""],get_fd:[27,4,1,""],message_type:[27,4,1,""],reset:[27,4,1,""],set_message_type:[27,4,1,""]},"pyDKB.dataflow.communication.stream.Stream.Stream":{EOM:[30,6,1,""],close:[30,4,1,""],configure:[30,4,1,""],get_fd:[30,4,1,""],message_type:[30,4,1,""],reset:[30,4,1,""],set_message_type:[30,4,1,""]},"pyDKB.dataflow.communication.stream.StreamBuilder":{build:[27,4,1,""],message_type:[27,6,1,""],setStream:[27,4,1,""],setType:[27,4,1,""],streamClass:[27,6,1,""]},"pyDKB.dataflow.communication.stream.exceptions":{StreamException:[31,5,1,""]},"pyDKB.dataflow.dkbID":{dkbID:[32,1,1,""]},"pyDKB.dataflow.exceptions":{DataflowException:[33,5,1,""]},"pyDKB.dataflow.stage":{AbstractStage:[35,0,0,"-"],ProcessorStage:[36,0,0,"-"]},"pyDKB.dataflow.stage.AbstractStage":{AbstractStage:[35,2,1,""]},"pyDKB.dataflow.stage.AbstractStage.AbstractStage":{add_argument:[35,4,1,""],args_error:[35,4,1,""],config_error:[35,4,1,""],defaultArguments:[35,4,1,""],log_configuration:[35,4,1,""],output_error:[35,4,1,""],parse_args:[35,4,1,""],print_usage:[35,4,1,""],read_config:[35,4,1,""],run:[35,4,1,""],set_error:[35,4,1,""],stop:[35,4,1,""]},"pyDKB.dataflow.stage.ProcessorStage":{ProcessorStage:[36,2,1,""],clear_buffer:[34,4,1,""],configure:[34,4,1,""],defaultArguments:[34,4,1,""],flush_buffer:[34,4,1,""],forward:[34,4,1,""],get_source_info:[34,4,1,""],input:[34,4,1,""],input_message_class:[34,4,1,""],output:[34,4,1,""],output_message_class:[34,4,1,""],process:[34,7,1,""],run:[34,4,1,""],set_input_message_type:[34,4,1,""],set_output_message_type:[34,4,1,""],skip_process:[34,7,1,""],stop:[34,4,1,""]},"pyDKB.dataflow.stage.ProcessorStage.ProcessorStage":{clear_buffer:[36,4,1,""],configure:[36,4,1,""],defaultArguments:[36,4,1,""],flush_buffer:[36,4,1,""],forward:[36,4,1,""],get_source_info:[36,4,1,""],input:[36,4,1,""],input_message_class:[36,4,1,""],output:[36,4,1,""],output_message_class:[36,4,1,""],process:[36,7,1,""],run:[36,4,1,""],set_input_message_type:[36,4,1,""],set_output_message_type:[36,4,1,""],skip_process:[36,7,1,""],stop:[36,4,1,""]},pyDKB:{common:[4,0,0,"-"],dataflow:[13,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","function","Python function"],"2":["py","class","Python class"],"3":["py","classmethod","Python class method"],"4":["py","method","Python method"],"5":["py","exception","Python exception"],"6":["py","attribute","Python attribute"],"7":["py","staticmethod","Python static method"]},objtypes:{"0":"py:module","1":"py:function","2":"py:class","3":"py:classmethod","4":"py:method","5":"py:exception","6":"py:attribute","7":"py:staticmethod"},terms:{"055_documents2ttl":0,"abstract":[0,6,21,27,30,34,35,36],"boolean":3,"case":[11,14,34,35,36],"class":[5,6,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,34,35,36],"default":[3,23,24,34,36],"final":[34,36],"function":[0,3,7,11,34,36],"import":[3,7],"int":[0,6],"new":[3,17,18,21,25,27,28],"return":[0,3,6,7,9,10,15,16,17,18,20,21,22,23,24,25,26,27,28,30,32,34,35,36],"static":[18,23,24,34,36],"true":[0,3,6,7,14,17,18,25,27,28,30,34,35,36],"try":[9,25],CDS:0,For:[3,21],That:3,The:[0,34,36],There:3,__config:35,__input:[34,36],__iter:[27,28],__main__:3,__name__:3,__output:[34,36],__parser:35,__stoppabl:[34,36],_cds_attr:0,_err:35,_incomplet:21,_message_typ:3,about:[3,14,18,20,23,24,26,27,29,34,36],abstract_extract:0,abstractmessag:[21,34,36],abstractprocessorstag:0,abstractstag:[3,13,34,36],accept:0,accord:[3,19,23,34,36],achiev:[34,36],across:12,action:[34,36],actual:[7,25],add:[3,6,27,29,35],add_argu:35,added:6,addit:[11,21,34,36],all:[3,5,9,11,12,17,34,35,36],allow:[14,34,36],also:3,ancestor:5,anew:[34,36],ani:[0,3,11,23],anoth:7,anywher:[27,29],appl:6,appli:0,applic:3,apreduc:36,arg:[0,6,9,11,14,34,35,36],argpars:[0,35],args_error:35,argument:[0,3,6,17,25,34,35,36],argumentpars:35,argv:[0,3],arxiv:0,arxiv_extract:0,atla:0,author:0,avoid:[3,7],awar:14,base:[0,3,5,6,8,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36],base_dir:[23,24],basenam:9,believ:20,belong:11,better:[18,23,24],between:[10,11],bodi:11,bool:[0,6,17,18,21,27,28,34,36],browser:14,buffer:[25,27,29,34,36],build:[16,22,27],call:[0,3,7,34,36],can:[3,7,36],cds:[3,13],cds_attr:0,cds_id_extract:0,cds_internal_extract:0,cds_parameter_extract:0,cdsinvenioconnector:14,chain:10,chang:[34,36],check:[3,6,7,17,18,21,27,28],check_stderr:9,citi:3,classmethod:[5,21],clear_buff:[34,36],close:[14,17,23,24,25,27,28,30],close_fil:[23,24],cls:21,code:[0,3,9,21,35],com:7,combin:3,command:[0,3,34,36],common:[3,17,18,23,24,25,27,30,35],commuic:30,commun:[3,7,13,34,36],compat:3,config:[16,17,18,19,20,22,23,24,25,26,27,28,29,30,34,35,36],config_dir:[23,24],config_error:35,configpars:35,configur:[3,18,19,20,23,24,26,27,29,30,34,35,36],confirm:21,connect:0,connector:3,consider:0,construct:[0,7,16,22,23,24],constructor:[6,16,22,27],consum:[3,13,15,34,36],consumerbuild:16,consumerclass:16,consumerexcept:17,contain:[0,10,11],containspubl:0,content:[0,2,3,21],context:3,contrib:14,convert:0,copi:9,correspond:[3,21,23],creat:[3,9,27],creation:[0,3],creation_date_extract:0,current:[0,3,9,17,18,20,21,22,23,24,25,26,27,28,34,36],current_fil:[18,23],currenti:20,custom:[7,35],custom_readlin:[3,4],data:[0,3,7,16,17,18,19,20,21,22,23,24,25,26,32,34,36],data_typ:32,dataflow:[0,3,7],dataflowexcept:[17,25,31,33],date:[0,3],decid:3,decod:[3,21],decodeunknowntyp:21,def:3,default_dir:23,defaultargu:[34,35,36],defaultdict:35,defin:[0,18],define_glob:0,definit:[6,8,12,21,33,34,35,36,37],del:14,delet:14,delimet:7,depend:35,descript:[34,35,36],descriptor:[18,20,23,26,27,28,30,34,36],dest:[3,9,22,23,24,25,26,36],destin:[3,22,23,25],detail:3,develop:3,dfs:36,dict:[0,3,10,21],dir:[23,36],directori:[9,23,24,36],dirnam:[9,23],dkb:[0,3,7,32,33,34,35,36],dkbid:[0,3,13],doc_id:0,doc_iri:0,document:[0,3],document_cd:0,document_gl:0,document_link:0,documents2ttl:0,doe:6,doesn:0,doi2ttl:0,doi:0,dot:10,download:9,drop:[3,25,27,29,34,36],each:0,easi:3,elif:6,els:25,empti:[11,17,18,27,28],encod:[3,21],encodeunknowntyp:21,end:[9,27,29],ensur:[23,24],ensure_dir:[23,24],eom:[27,30],eop:[25,27,29],eopmark:[34,36],equival:36,err_trac:35,err_typ:35,err_val:35,error:[7,35],escap:0,etc:[20,26,27,30,34,36],etl:3,even:[20,26],everi:[3,7,34,36],exampl:[0,3,6],exc_info:35,except:[3,4,13,15,17,21,25,27,30],execut:3,exist:[6,23,24],exit:[34,35,36],expect:[34,36],extend:14,extens:21,extract:[0,3],fail:[3,17,27,28,34,35,36],failur:[34,36],fals:[3,6,14,17,18,27,28,34,35,36],fcntl:7,field:[21,34,36],file:[0,3,7,9,16,18,20,21,22,23,24,26,27,28,30,34,35,36],file_info:[23,24],fileconsum:[3,13,15,16,19,20],filenam:[9,23],fileproduc:[3,13,15,22,24,26],filter:3,finish:[34,36],first:[3,11,34,36],fix:0,fix_list_valu:0,fix_str:0,flow:[3,7],flush:[25,27,29,34,36],flush_buff:[34,36],fname:9,follow:[0,3],forc:[27,28],formal:21,format:[3,21,34,36],forward:[34,36],found:[0,7],frame:14,from:[0,3,7,9,17,18,21,23,24,27,28,34,35,36],full:[3,23,24],fulli:[3,18],further:0,futur:3,gener:[0,3,7,32,34,36],generate_journal_id:0,get:[0,3,9,17,18,20,21,22,23,25,26,27,28,30,34,36],get_dest:[23,25,26],get_dest_info:[23,25,26],get_dir:23,get_document_iri:0,get_fd:[27,30],get_filenam:23,get_messag:[17,27,28],get_sourc:[18,20],get_source_info:[17,18,20,23,34,36],get_stream:[17,25],getfil:9,getorigin:21,github:7,given:[5,9,11],glanc:0,glance_attr:0,glance_parameter_extract:0,global:0,graph:0,gssapi:14,guid:2,handl:7,handler:[3,14],has:0,hasabstract:0,hasarxivcod:0,hascds_id:0,hascdsintern:0,hascdsreportnumb:0,hascreationd:0,hasdoi:0,hasfulltitl:0,hasglance_id:0,hash:23,haskeyword:0,haslabel:0,hasmemb:6,hasrefcod:0,hasshorttitl:0,hastitl:0,hasurl:0,hasvolum:0,hasyear:0,have:[0,3],hdf:[3,4,8,18,19,24,36],hdfsconsum:[3,13,15,16],hdfsexcept:8,hdfsproduc:[3,13,15,22],hook:0,http:7,identifi:32,ignor:11,ile:36,iles:36,implement:[0,3,7,17,18,19,20,21,23,24,25,26,27,28,29,34,36],includ:23,incompl:21,incompl_kei:21,incomplet:[7,21,34,36],independ:3,index:2,indic:3,individu:[34,36],info:[17,18,20,22,23,25,26,35],inform:[3,34,35,36],init:[16,17,22,25,34],init_sourc:18,init_stream:[17,25],initi:[0,17,18,25,27,28],input:[0,3,7,17,20,25,27,28,30,34,36],input_data:3,input_messag:[34,36],input_message_class:[34,36],inputstream:[3,13,15,17,27],insid:[34,36],instanc:[0,3,16,22,27,29,34,35,36],instead:7,integr:3,intend:3,interact:9,intern:[0,3],interrupt:7,invenio_cli:14,iobas:[27,30],iri:0,is_read:[27,28],isbasedon:0,issu:0,item:0,iter:[7,18,34,36],its:[0,9,34,36],itself:10,jame:3,john:3,join:9,jonathan:3,journal:0,journal_dict:0,journal_issu:0,journalissu:0,json:[0,3,10,21],json_data:[0,10,32],json_util:[3,4],jsonmessag:[3,21],jsonprocessorstag:3,just:[34,36],kafka:3,keep:[3,34,36],kei:[3,10],kerberizedcdsinvenioconnector:14,kerbero:14,keyword:0,keywords2ttl:0,kill:14,knowledg:3,known:25,kwarg:35,languag:3,last:[7,25,35],left:[7,17,18,27,28],let:3,level:[5,11],librari:[3,7,12,37],like:[7,20,26,34,36],line:[0,3,7,11,34,36],link:[0,17,25],list:[0,3,9,10,11,34,36],list_val:0,listdir:9,load:3,local:[9,23,36],local_path:23,log:[5,11,35],log_configur:35,loggableobject:[3,4,17,25,27,30,35],logic:3,login:14,loglevel:[5,11],look:0,mai:[7,34,36],main:0,make:7,makedir:9,manag:3,mani:3,mark:[10,34,36],marker:[21,25],matter:21,max_lin:9,mean:[0,3],meaning:32,mechan:14,member:[5,6,11],membernam:6,merg:[20,26],messag:[0,3,5,7,11,13,15,16,17,22,25,27,28,29,30,34,35,36],message_class:[17,25],message_typ:[17,22,25,27,30],messagetyp:3,metadata:[0,3,23,24],method:[5,7,22,27,28,34,36],middl:7,mint:3,misc:[3,4],miscellani:11,mode:[3,9,34,35,36],modul:[0,2,3,4,13,15,16,22,27,34],more:7,most:14,move:[7,9,24],movefil:9,msg:[0,25],msg_buffer:[27,29],msg_type:[15,21,27,30],multilin:11,multipl:[20,23,24,26],must:[3,10,34,36],my_process:3,mytyp:6,name:[0,3,6,7,9,20,21,23,24,26,36],namespac:[0,35],native_typ:21,ndjson:3,nearest:18,necessari:[34,36],need:[3,5,35],negoti:14,nest:10,nestedkei:10,newlin:7,next:[7,11,17,18,27,28],next_sourc:[18,20],node:3,non:[0,18],none:[0,9,16,17,18,20,21,22,23,25,26,27,28,29,30,34,35,36],nonetyp:[0,17,18,21,27,28],note:[34,36],note_cds_attr:0,note_glance_attr:0,noth:7,notimplementederror:21,now:3,number:[0,20,26],object:[0,3,5,6,7,9,10,11,16,17,21,22,27,28,30,32,34,35,36,37],onc:3,one:[7,14,20,23],ones:3,onli:[3,9,11,20],ontolog:0,open:[9,17,23,25,27,30,35],oper:[3,27,30,31],option:[7,36],orang:6,organ:13,orig_handl:14,origin:21,other:[0,11],our:3,out:3,out_data:3,out_messag:3,output:[0,3,5,9,11,23,24,25,27,29,30,34,35,36],output_error:35,output_messag:[34,36],output_message_class:[34,36],outputstream:[3,13,15,27],over:7,overrid:[27,28],packag:2,pandawm:7,paper:0,paper_cds_attr:0,paper_glance_attr:0,parallel:[23,24],param_nam:0,paramet:[0,5,6,7,9,10,11,19,21,25,27,30,34,35,36],parent:[9,27,28],pars:[0,3,17,27,28,34,35,36],parse_arg:35,parse_messag:[27,28],parser:[34,35,36],part:[0,3,34,36],pass:[3,6,15,21,35],password:14,path:[9,23,24],perform:[3,34,36],pipe:[18,20,23,24,26],pipelin:3,pleas:3,plum:6,point:[18,23,24],popul:3,posit:6,prefer:23,prefix:11,prehap:[20,26],prepar:[34,36],prepend:11,previou:[3,21,27,30],print:[6,35],print_usag:35,proc:9,process:[0,3,27,29,34,35,36],process_journ:0,processor:[3,34,36],processorstag:[0,3,13,34],produc:[3,13,15],producerbuild:22,producerclass:22,producerexcept:25,program:3,propag:14,properti:[27,28],protect:14,provid:11,pull:7,put:[10,25,34,36],putfil:9,pydkb:[0,2],python:3,quickstart:2,quotat:10,rais:[7,21,27,30],reach:3,read:[7,17,18,35],read_config:35,readabl:[7,17,18,27,28],readi:3,readlin:7,real:25,receiv:0,reconfigur:[17,18,19,20,23,25,26],record:11,rel:36,remov:23,replac:11,report:0,report_number_extract:0,repres:[11,14],requir:[34,36],reset:[17,18,21,23,25,27,28,30],reset_fil:23,reset_stream:[17,25],restore_handl:14,result:0,rethink:7,retriev:6,retrun:[27,28],rewritten:36,right:3,robin:18,role:3,round:18,run:[0,3,14,34,35,36],runtimeerror:8,safeconfigpars:35,same:[11,14],sampl:3,seamless:[34,36],search:[2,10],section:3,semant:[34,36],send:[7,9,27,29,34,36],sentenc:0,separ:[7,10],sequenc:0,set:[3,16,17,21,22,23,24,25,27,30,34,35,36],set_:3,set_default_dir:[23,24],set_error:35,set_input_message_typ:[3,34,36],set_message_typ:[17,25,27,30],set_output_message_typ:[3,34,36],setdest:22,setsourc:16,setsourceinfomethod:22,setstream:27,settyp:[16,22,27],should:[0,3],signal:[14,27,29],signum:14,simpl:[3,14],simpliest:[34,36],simplifi:3,singl:[3,20,21,26,34,36],sink:3,sketchi:3,skip:[34,36],skip_process:[34,36],smallest:3,smith:3,some:[0,21,34,36],sourc:[0,3,5,6,7,8,9,10,11,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36],source_is_read:18,special:11,specif:[0,21,35],specifi:[34,36],spnego:14,src_info:22,stage:[2,3,13,17,18,25],standalon:3,start:3,statement:[3,21],statu:21,stderr:[9,35],step:3,still:0,stop:[34,35,36],stopiter:7,storag:3,str:[0,5,6,7,21,35],stream:[3,13,15,17,20,25,26,34,36],stream_is_read:17,streambuild:27,streamclass:27,streamconsum:[3,13,15,16,18],streamexcept:31,streamproduc:[3,13,15,22,23,24],strict:7,string:[0,6,10,11,21],structur:0,sub_dir:[23,24],subdir:[23,24],subdirectori:9,submodul:[3,7],subpackag:2,subprocess:9,success:[3,34,36],successfulli:[34,36],supervis:3,supervisor:[3,27,29],support:[0,3],supporting_docu:0,supporting_not:0,supportingdocu:0,symbol:11,sys:3,system:3,take:[3,21,34,36],taken:[3,36],task:0,tdin:36,tdout:[3,36],temporari:[9,23],temporaryfil:9,term:3,text:21,thei:11,them:3,thi:[0,3,7,14,20,21,26,34,36],think:[18,20,23,24,26],three:3,through:3,thrown:21,till:9,time:3,timeout:9,timestamp:[11,23],titl:0,title_extract:0,todo:[0,18,20,21,23,24,26,27,30],topolog:3,traceback:35,transform:[3,10,34,36],tream:[34,36],ttl:[0,21],ttlmessag:21,turn:[3,34,36],two:3,type:[0,3,4,5,7,11,13,15,16,17,18,21,22,25,27,28,30,32,34,36],typenam:21,unbroken:[34,36],unchang:0,under:[0,7],unicod:[0,21],uniqu:32,unit:3,until:7,updat:18,upload:9,usag:[0,35,36],use:25,used:[3,7,12],user:[3,14],util:[9,10,11,13,32],val:6,valu:[0,3,6,7,9,10,17,18,21,27,28,34,36],valuebykei:10,valueerror:21,variabl:[0,34,35,36],verifi:[27,28],version:3,via:[0,14],volum:0,wai:[34,36],wait:9,walnut:6,welcom:3,were:3,what:7,when:[7,21],where:[15,21,36],which:[3,14],whitespac:11,window:7,wip:3,without:[3,7,9,25,27,29,34,36],work:[3,10],worker:3,workflow:0,write:[3,7,25,27,29],written:3,wrong:6,wrong_str:0,xxx:[15,21],xxxmessag:[15,21],year:0,yet:[18,32],yield:7,york:3,your:3},titles:["Stage 055","Stages","Welcome to Data Knowledge Base documentation pages","pyDKB package","pyDKB.common package","pyDKB.common.LoggableObject module","pyDKB.common.Type module","pyDKB.common.custom_readline module","pyDKB.common.exceptions module","pyDKB.common.hdfs module","pyDKB.common.json_utils module","pyDKB.common.misc module","pyDKB.common.types module","pyDKB.dataflow package","pyDKB.dataflow.cds module","pyDKB.dataflow.communication package","pyDKB.dataflow.communication.consumer package","pyDKB.dataflow.communication.consumer.Consumer module","pyDKB.dataflow.communication.consumer.FileConsumer module","pyDKB.dataflow.communication.consumer.HDFSConsumer module","pyDKB.dataflow.communication.consumer.StreamConsumer module","pyDKB.dataflow.communication.messages module","pyDKB.dataflow.communication.producer package","pyDKB.dataflow.communication.producer.FileProducer module","pyDKB.dataflow.communication.producer.HDFSProducer module","pyDKB.dataflow.communication.producer.Producer module","pyDKB.dataflow.communication.producer.StreamProducer module","pyDKB.dataflow.communication.stream package","pyDKB.dataflow.communication.stream.InputStream module","pyDKB.dataflow.communication.stream.OutputStream module","pyDKB.dataflow.communication.stream.Stream module","pyDKB.dataflow.communication.stream.exceptions module","pyDKB.dataflow.dkbID module","pyDKB.dataflow.exceptions module","pyDKB.dataflow.stage package","pyDKB.dataflow.stage.AbstractStage module","pyDKB.dataflow.stage.ProcessorStage module","pyDKB.dataflow.types module"],titleterms:{abstractstag:35,base:2,cds:14,common:[4,5,6,7,8,9,10,11,12],commun:[15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],consum:[16,17,18,19,20],custom_readlin:7,data:2,dataflow:[13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37],dkbid:32,document:2,except:[8,31,33],fileconsum:18,fileproduc:23,guid:3,hdf:9,hdfsconsum:19,hdfsproduc:24,indic:2,inputstream:28,json_util:10,knowledg:2,loggableobject:5,messag:21,misc:11,modul:[5,6,7,8,9,10,11,12,14,17,18,19,20,21,23,24,25,26,28,29,30,31,32,33,35,36,37],outputstream:29,packag:[3,4,13,15,16,22,27,34],page:2,processorstag:36,produc:[22,23,24,25,26],pydkb:[3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37],quickstart:3,stage:[0,1,34,35,36],stream:[27,28,29,30,31],streamconsum:20,streamproduc:26,submodul:[4,13,15,16,22,27,34],subpackag:[3,13,15],tabl:2,todo:7,type:[6,12,37],welcom:2}}) \ No newline at end of file diff --git a/Docs/build/pdf/DKB.pdf b/Docs/build/pdf/DKB.pdf index d0886a365..841193f9c 100644 Binary files a/Docs/build/pdf/DKB.pdf and b/Docs/build/pdf/DKB.pdf differ diff --git a/Utils/Dataflow/055_documents2TTL/documents2ttl.py b/Utils/Dataflow/055_documents2TTL/documents2ttl.py index d782433b6..4e51137e1 100755 --- a/Utils/Dataflow/055_documents2TTL/documents2ttl.py +++ b/Utils/Dataflow/055_documents2TTL/documents2ttl.py @@ -290,8 +290,8 @@ def cds_internal_extraction(data): :param data: 'CDS' part of the initial JSON :type data: dict - :return: CDS internal report number - :rtype: unicode + :return: CDS internal report number or None if it was not found + :rtype: unicode or NoneType """ if 'report_number' in data: report_number = data.get('report_number') @@ -313,8 +313,8 @@ def report_number_extraction(data): :param data: 'CDS' part of the initial JSON :type data: dict - :return: report number - :rtype: unicode + :return: report number or None if it was not found + :rtype: unicode or NoneType """ if 'report_number' in data: report_number = data.get('report_number') @@ -335,8 +335,8 @@ def glance_parameter_extraction(param_name, json_data): :param json_data: 'GLANCE' part of the initial JSON :type json_data: dict - :return: parameter value - :rtype: str, unicode + :return: parameter value or None if it was not found + :rtype: str, unicode, NoneType """ if param_name == 'id': return json_data['id'] @@ -359,8 +359,8 @@ def cds_parameter_extraction(param_name, json_data): :type param_name: str :param json_data: 'CDS' part of the initial JSON - :return: parameter value - :rtype: int, str + :return: parameter value or None if it was not found + :rtype: int, str, NoneType """ if param_name == 'abstract': return abstract_extraction(json_data) @@ -384,8 +384,8 @@ def abstract_extraction(data): :param data: 'CDS' part of the initial JSON :type data: dict - :return: abstract - :rtype: str + :return: abstract or None if it was not found + :rtype: str or NoneType """ result = None if 'abstract' in data: @@ -408,8 +408,8 @@ def title_extraction(data): :param data: 'CDS' part of the initial JSON :type data: dict - :return: title - :rtype: str + :return: title or None if it was not found + :rtype: str or NoneType """ if 'title' in data: return fix_string(data.get('title').get('title')) @@ -421,8 +421,8 @@ def cds_id_extraction(data): :param data: 'CDS' part of the initial JSON :type data: dict - :return: CDS id - :rtype: int + :return: CDS id or None if it was not found + :rtype: int or NoneType """ if 'recid' in data: return int(data.get('recid')) @@ -434,8 +434,8 @@ def creation_date_extraction(data): :param data: 'CDS' part of the initial JSON :type data: dict - :return: creation date - :rtype: str + :return: creation date or None if it was not found + :rtype: str or NoneType """ if 'creation_date' in data: return fix_string(data.get('creation_date')) @@ -447,8 +447,8 @@ def arxiv_extraction(data): :param data: 'CDS' part of the initial JSON :type data: dict - :return: arXiv code - :rtype: str + :return: arXiv code or None if it was not found + :rtype: str or NoneType """ if 'primary_report_number' in data: report_number = data.get('primary_report_number')